@@ -29,7 +29,7 @@ class RequestEncodingTests(unittest.TestCase):
2929
3030 def test_bom (self ):
3131 # cjk water character in unicode
32- water_unicode = u '\u6C34 '
32+ water_unicode = '\u6C34 '
3333 # BOM + water character encoded
3434 utf16be = b'\xfe \xff \x6c \x34 '
3535 utf16le = b'\xff \xfe \x34 \x6c '
@@ -69,19 +69,19 @@ def test_html_body_declared_encoding(self):
6969
7070 def test_html_body_declared_encoding_unicode (self ):
7171 # html_body_declared_encoding should work when unicode body is passed
72- self .assertEqual (None , html_body_declared_encoding (u "something else" ))
72+ self .assertEqual (None , html_body_declared_encoding ("something else" ))
7373
7474 for fragment in self .utf8_fragments :
7575 encoding = html_body_declared_encoding (fragment .decode ('utf8' ))
7676 self .assertEqual (encoding , 'utf-8' , fragment )
7777
78- self .assertEqual (None , html_body_declared_encoding (u """
78+ self .assertEqual (None , html_body_declared_encoding ("""
7979 <head></head><body>
8080 this isn't searched
8181 <meta charset="utf-8">
8282 """ ))
8383 self .assertEqual (None , html_body_declared_encoding (
84- u """<meta http-equiv="Fake-Content-Type-Header" content="text/html; charset=utf-8">""" ))
84+ """<meta http-equiv="Fake-Content-Type-Header" content="text/html; charset=utf-8">""" ))
8585
8686
8787class CodecsEncodingTestCase (unittest .TestCase ):
@@ -95,10 +95,10 @@ def test_resolve_encoding(self):
9595class UnicodeDecodingTestCase (unittest .TestCase ):
9696
9797 def test_utf8 (self ):
98- self .assertEqual (to_unicode (b'\xc2 \xa3 ' , 'utf-8' ), u '\xa3 ' )
98+ self .assertEqual (to_unicode (b'\xc2 \xa3 ' , 'utf-8' ), '\xa3 ' )
9999
100100 def test_invalid_utf8 (self ):
101- self .assertEqual (to_unicode (b'\xc2 \xc2 \xa3 ' , 'utf-8' ), u '\ufffd \xa3 ' )
101+ self .assertEqual (to_unicode (b'\xc2 \xc2 \xa3 ' , 'utf-8' ), '\ufffd \xa3 ' )
102102
103103
104104def ct (charset ):
@@ -110,7 +110,7 @@ def norm_encoding(enc):
110110class HtmlConversionTests (unittest .TestCase ):
111111
112112 def test_unicode_body (self ):
113- unicode_string = u '\u043a \u0438 \u0440 \u0438 \u043b \u043b \u0438 \u0447 \u0435 \u0441 \u043a \u0438 \u0439 \u0442 \u0435 \u043a \u0441 \u0442 '
113+ unicode_string = '\u043a \u0438 \u0440 \u0438 \u043b \u043b \u0438 \u0447 \u0435 \u0441 \u043a \u0438 \u0439 \u0442 \u0435 \u043a \u0441 \u0442 '
114114 original_string = unicode_string .encode ('cp1251' )
115115 encoding , body_unicode = html_to_unicode (ct ('cp1251' ), original_string )
116116 # check body_as_unicode
@@ -137,23 +137,23 @@ def test_content_type_and_conversion(self):
137137 """Test content type header is interpreted and text converted as
138138 expected
139139 """
140- self ._assert_encoding ('utf-8' , b"\xc2 \xa3 " , 'utf-8' , u "\xa3 " )
140+ self ._assert_encoding ('utf-8' , b"\xc2 \xa3 " , 'utf-8' , "\xa3 " )
141141 # something like this in the scrapy tests - but that's invalid?
142- # self._assert_encoding('', "\xa3", 'utf-8', u "\xa3")
142+ # self._assert_encoding('', "\xa3", 'utf-8', "\xa3")
143143 # iso-8859-1 is overridden to cp1252
144- self ._assert_encoding ('iso-8859-1' , b"\xa3 " , 'cp1252' , u "\xa3 " )
145- self ._assert_encoding ('' , b"\xc2 \xa3 " , 'utf-8' , u "\xa3 " )
146- self ._assert_encoding ('none' , b"\xc2 \xa3 " , 'utf-8' , u "\xa3 " )
147- self ._assert_encoding ('gb2312' , b"\xa8 D" , 'gb18030' , u "\u2015 " )
148- self ._assert_encoding ('gbk' , b"\xa8 D" , 'gb18030' , u "\u2015 " )
149- self ._assert_encoding ('big5' , b"\xf9 \xda " , 'big5hkscs' , u "\u6052 " )
144+ self ._assert_encoding ('iso-8859-1' , b"\xa3 " , 'cp1252' , "\xa3 " )
145+ self ._assert_encoding ('' , b"\xc2 \xa3 " , 'utf-8' , "\xa3 " )
146+ self ._assert_encoding ('none' , b"\xc2 \xa3 " , 'utf-8' , "\xa3 " )
147+ self ._assert_encoding ('gb2312' , b"\xa8 D" , 'gb18030' , "\u2015 " )
148+ self ._assert_encoding ('gbk' , b"\xa8 D" , 'gb18030' , "\u2015 " )
149+ self ._assert_encoding ('big5' , b"\xf9 \xda " , 'big5hkscs' , "\u6052 " )
150150
151151 def test_invalid_utf8_encoded_body_with_valid_utf8_BOM (self ):
152152 # unlike scrapy, the BOM is stripped
153153 self ._assert_encoding ('utf-8' , b"\xef \xbb \xbf WORD\xe3 \xab WORD2" ,
154- 'utf-8' , u 'WORD\ufffd WORD2' )
154+ 'utf-8' , 'WORD\ufffd WORD2' )
155155 self ._assert_encoding (None , b"\xef \xbb \xbf WORD\xe3 \xab WORD2" ,
156- 'utf-8' , u 'WORD\ufffd WORD2' )
156+ 'utf-8' , 'WORD\ufffd WORD2' )
157157
158158 def test_utf8_unexpected_end_of_data_with_valid_utf8_BOM (self ):
159159 # Python implementations handle unexpected end of UTF8 data
@@ -163,24 +163,24 @@ def test_utf8_unexpected_end_of_data_with_valid_utf8_BOM(self):
163163
164164 # unlike scrapy, the BOM is stripped
165165 self ._assert_encoding ('utf-8' , b"\xef \xbb \xbf WORD\xe3 \xab " ,
166- 'utf-8' , [u 'WORD\ufffd \ufffd ' , u 'WORD\ufffd ' ])
166+ 'utf-8' , ['WORD\ufffd \ufffd ' , 'WORD\ufffd ' ])
167167 self ._assert_encoding (None , b"\xef \xbb \xbf WORD\xe3 \xab " ,
168- 'utf-8' , [u 'WORD\ufffd \ufffd ' , u 'WORD\ufffd ' ])
168+ 'utf-8' , ['WORD\ufffd \ufffd ' , 'WORD\ufffd ' ])
169169
170170 def test_replace_wrong_encoding (self ):
171171 """Test invalid chars are replaced properly"""
172172 encoding , body_unicode = html_to_unicode (ct ('utf-8' ),
173173 b'PREFIX\xe3 \xab SUFFIX' )
174174 # XXX: Policy for replacing invalid chars may suffer minor variations
175- # but it should always contain the unicode replacement char (u '\ufffd')
176- assert u '\ufffd ' in body_unicode , repr (body_unicode )
177- assert u 'PREFIX' in body_unicode , repr (body_unicode )
178- assert u 'SUFFIX' in body_unicode , repr (body_unicode )
175+ # but it should always contain the unicode replacement char ('\ufffd')
176+ assert '\ufffd ' in body_unicode , repr (body_unicode )
177+ assert 'PREFIX' in body_unicode , repr (body_unicode )
178+ assert 'SUFFIX' in body_unicode , repr (body_unicode )
179179
180180 # Do not destroy html tags due to encoding bugs
181181 encoding , body_unicode = html_to_unicode (ct ('utf-8' ),
182182 b'\xf0 <span>value</span>' )
183- assert u '<span>value</span>' in body_unicode , repr (body_unicode )
183+ assert '<span>value</span>' in body_unicode , repr (body_unicode )
184184
185185 def _assert_encoding_detected (self , content_type , expected_encoding , body ,
186186 ** kwargs ):
@@ -193,39 +193,39 @@ def test_BOM(self):
193193 # utf-16 cases already tested, as is the BOM detection function
194194
195195 # http header takes precedence, irrespective of BOM
196- bom_be_str = codecs .BOM_UTF16_BE + u "hi" .encode ('utf-16-be' )
197- expected = u '\ufffd \ufffd \x00 h\x00 i'
196+ bom_be_str = codecs .BOM_UTF16_BE + "hi" .encode ('utf-16-be' )
197+ expected = '\ufffd \ufffd \x00 h\x00 i'
198198 self ._assert_encoding ('utf-8' , bom_be_str , 'utf-8' , expected )
199199
200200 # BOM is stripped when it agrees with the encoding, or used to
201201 # determine encoding
202202 bom_utf8_str = codecs .BOM_UTF8 + b'hi'
203- self ._assert_encoding ('utf-8' , bom_utf8_str , 'utf-8' , u "hi" )
204- self ._assert_encoding (None , bom_utf8_str , 'utf-8' , u "hi" )
203+ self ._assert_encoding ('utf-8' , bom_utf8_str , 'utf-8' , "hi" )
204+ self ._assert_encoding (None , bom_utf8_str , 'utf-8' , "hi" )
205205
206206 def test_utf16_32 (self ):
207207 # tools.ietf.org/html/rfc2781 section 4.3
208208
209209 # USE BOM and strip it
210- bom_be_str = codecs .BOM_UTF16_BE + u "hi" .encode ('utf-16-be' )
211- self ._assert_encoding ('utf-16' , bom_be_str , 'utf-16-be' , u "hi" )
212- self ._assert_encoding (None , bom_be_str , 'utf-16-be' , u "hi" )
210+ bom_be_str = codecs .BOM_UTF16_BE + "hi" .encode ('utf-16-be' )
211+ self ._assert_encoding ('utf-16' , bom_be_str , 'utf-16-be' , "hi" )
212+ self ._assert_encoding (None , bom_be_str , 'utf-16-be' , "hi" )
213213
214- bom_le_str = codecs .BOM_UTF16_LE + u "hi" .encode ('utf-16-le' )
215- self ._assert_encoding ('utf-16' , bom_le_str , 'utf-16-le' , u "hi" )
216- self ._assert_encoding (None , bom_le_str , 'utf-16-le' , u "hi" )
214+ bom_le_str = codecs .BOM_UTF16_LE + "hi" .encode ('utf-16-le' )
215+ self ._assert_encoding ('utf-16' , bom_le_str , 'utf-16-le' , "hi" )
216+ self ._assert_encoding (None , bom_le_str , 'utf-16-le' , "hi" )
217217
218- bom_be_str = codecs .BOM_UTF32_BE + u "hi" .encode ('utf-32-be' )
219- self ._assert_encoding ('utf-32' , bom_be_str , 'utf-32-be' , u "hi" )
220- self ._assert_encoding (None , bom_be_str , 'utf-32-be' , u "hi" )
218+ bom_be_str = codecs .BOM_UTF32_BE + "hi" .encode ('utf-32-be' )
219+ self ._assert_encoding ('utf-32' , bom_be_str , 'utf-32-be' , "hi" )
220+ self ._assert_encoding (None , bom_be_str , 'utf-32-be' , "hi" )
221221
222- bom_le_str = codecs .BOM_UTF32_LE + u "hi" .encode ('utf-32-le' )
223- self ._assert_encoding ('utf-32' , bom_le_str , 'utf-32-le' , u "hi" )
224- self ._assert_encoding (None , bom_le_str , 'utf-32-le' , u "hi" )
222+ bom_le_str = codecs .BOM_UTF32_LE + "hi" .encode ('utf-32-le' )
223+ self ._assert_encoding ('utf-32' , bom_le_str , 'utf-32-le' , "hi" )
224+ self ._assert_encoding (None , bom_le_str , 'utf-32-le' , "hi" )
225225
226226 # if there is no BOM, big endian should be chosen
227- self ._assert_encoding ('utf-16' , u "hi" .encode ('utf-16-be' ), 'utf-16-be' , u "hi" )
228- self ._assert_encoding ('utf-32' , u "hi" .encode ('utf-32-be' ), 'utf-32-be' , u "hi" )
227+ self ._assert_encoding ('utf-16' , "hi" .encode ('utf-16-be' ), 'utf-16-be' , "hi" )
228+ self ._assert_encoding ('utf-32' , "hi" .encode ('utf-32-be' ), 'utf-32-be' , "hi" )
229229
230230 def test_python_crash (self ):
231231 import random
0 commit comments