@@ -44,8 +44,9 @@ def text_encoding(encoding, stacklevel=2):
4444 """
4545 A helper function to choose the text encoding.
4646
47- When encoding is not None, just return it.
48- Otherwise, return the default text encoding (i.e. "locale").
47+ When encoding is not None, this function returns it.
48+ Otherwise, this function returns the default text encoding
49+ (i.e. "locale" or "utf-8" depends on UTF-8 mode).
4950
5051 This function emits an EncodingWarning if *encoding* is None and
5152 sys.flags.warn_default_encoding is true.
@@ -55,7 +56,10 @@ def text_encoding(encoding, stacklevel=2):
5556 However, please consider using encoding="utf-8" for new APIs.
5657 """
5758 if encoding is None :
58- encoding = "locale"
59+ if sys .flags .utf8_mode :
60+ encoding = "utf-8"
61+ else :
62+ encoding = "locale"
5963 if sys .flags .warn_default_encoding :
6064 import warnings
6165 warnings .warn ("'encoding' argument not specified." ,
@@ -101,7 +105,6 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None,
101105 'b' binary mode
102106 't' text mode (default)
103107 '+' open a disk file for updating (reading and writing)
104- 'U' universal newline mode (deprecated)
105108 ========= ===============================================================
106109
107110 The default mode is 'rt' (open for reading text). For binary random
@@ -117,10 +120,6 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None,
117120 returned as strings, the bytes having been first decoded using a
118121 platform-dependent encoding or using the specified encoding if given.
119122
120- 'U' mode is deprecated and will raise an exception in future versions
121- of Python. It has no effect in Python 3. Use newline to control
122- universal newlines mode.
123-
124123 buffering is an optional integer used to set the buffering policy.
125124 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
126125 line buffering (only usable in text mode), and an integer > 1 to indicate
@@ -206,7 +205,7 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None,
206205 if errors is not None and not isinstance (errors , str ):
207206 raise TypeError ("invalid errors: %r" % errors )
208207 modes = set (mode )
209- if modes - set ("axrwb+tU " ) or len (mode ) > len (modes ):
208+ if modes - set ("axrwb+t " ) or len (mode ) > len (modes ):
210209 raise ValueError ("invalid mode: %r" % mode )
211210 creating = "x" in modes
212211 reading = "r" in modes
@@ -215,13 +214,6 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None,
215214 updating = "+" in modes
216215 text = "t" in modes
217216 binary = "b" in modes
218- if "U" in modes :
219- if creating or writing or appending or updating :
220- raise ValueError ("mode U cannot be combined with 'x', 'w', 'a', or '+'" )
221- import warnings
222- warnings .warn ("'U' mode is deprecated" ,
223- DeprecationWarning , 2 )
224- reading = True
225217 if text and binary :
226218 raise ValueError ("can't have text and binary mode at once" )
227219 if creating + reading + writing + appending > 1 :
@@ -311,22 +303,6 @@ def _open_code_with_warning(path):
311303 open_code = _open_code_with_warning
312304
313305
314- def __getattr__ (name ):
315- if name == "OpenWrapper" :
316- # bpo-43680: Until Python 3.9, _pyio.open was not a static method and
317- # builtins.open was set to OpenWrapper to not become a bound method
318- # when set to a class variable. _io.open is a built-in function whereas
319- # _pyio.open is a Python function. In Python 3.10, _pyio.open() is now
320- # a static method, and builtins.open() is now io.open().
321- import warnings
322- warnings .warn ('OpenWrapper is deprecated, use open instead' ,
323- DeprecationWarning , stacklevel = 2 )
324- global OpenWrapper
325- OpenWrapper = open
326- return OpenWrapper
327- raise AttributeError (name )
328-
329-
330306# In normal operation, both `UnsupportedOperation`s should be bound to the
331307# same object.
332308try :
@@ -338,8 +314,7 @@ class UnsupportedOperation(OSError, ValueError):
338314
339315class IOBase (metaclass = abc .ABCMeta ):
340316
341- """The abstract base class for all I/O classes, acting on streams of
342- bytes. There is no public constructor.
317+ """The abstract base class for all I/O classes.
343318
344319 This class provides dummy implementations for many methods that
345320 derived classes can override selectively; the default implementations
@@ -1154,6 +1129,7 @@ def peek(self, size=0):
11541129 do at most one raw read to satisfy it. We never return more
11551130 than self.buffer_size.
11561131 """
1132+ self ._checkClosed ("peek of closed file" )
11571133 with self ._read_lock :
11581134 return self ._peek_unlocked (size )
11591135
@@ -1172,6 +1148,7 @@ def read1(self, size=-1):
11721148 """Reads up to size bytes, with at most one read() system call."""
11731149 # Returns up to size bytes. If at least one byte is buffered, we
11741150 # only return buffered bytes. Otherwise, we do one raw read.
1151+ self ._checkClosed ("read of closed file" )
11751152 if size < 0 :
11761153 size = self .buffer_size
11771154 if size == 0 :
@@ -1189,6 +1166,8 @@ def read1(self, size=-1):
11891166 def _readinto (self , buf , read1 ):
11901167 """Read data into *buf* with at most one system call."""
11911168
1169+ self ._checkClosed ("readinto of closed file" )
1170+
11921171 # Need to create a memoryview object of type 'b', otherwise
11931172 # we may not be able to assign bytes to it, and slicing it
11941173 # would create a new object.
@@ -1233,11 +1212,13 @@ def _readinto(self, buf, read1):
12331212 return written
12341213
12351214 def tell (self ):
1236- return _BufferedIOMixin .tell (self ) - len (self ._read_buf ) + self ._read_pos
1215+ # GH-95782: Keep return value non-negative
1216+ return max (_BufferedIOMixin .tell (self ) - len (self ._read_buf ) + self ._read_pos , 0 )
12371217
12381218 def seek (self , pos , whence = 0 ):
12391219 if whence not in valid_seek_flags :
12401220 raise ValueError ("invalid whence value" )
1221+ self ._checkClosed ("seek of closed file" )
12411222 with self ._read_lock :
12421223 if whence == 1 :
12431224 pos -= len (self ._read_buf ) - self ._read_pos
@@ -1845,7 +1826,7 @@ class TextIOBase(IOBase):
18451826 """Base class for text I/O.
18461827
18471828 This class provides a character and line based interface to stream
1848- I/O. There is no public constructor.
1829+ I/O.
18491830 """
18501831
18511832 def read (self , size = - 1 ):
@@ -1997,7 +1978,7 @@ class TextIOWrapper(TextIOBase):
19971978 r"""Character and line based layer over a BufferedIOBase object, buffer.
19981979
19991980 encoding gives the name of the encoding that the stream will be
2000- decoded or encoded with. It defaults to locale.getpreferredencoding(False ).
1981+ decoded or encoded with. It defaults to locale.getencoding( ).
20011982
20021983 errors determines the strictness of encoding and decoding (see the
20031984 codecs.register) and defaults to "strict".
@@ -2031,19 +2012,7 @@ def __init__(self, buffer, encoding=None, errors=None, newline=None,
20312012 encoding = text_encoding (encoding )
20322013
20332014 if encoding == "locale" :
2034- try :
2035- encoding = os .device_encoding (buffer .fileno ()) or "locale"
2036- except (AttributeError , UnsupportedOperation ):
2037- pass
2038-
2039- if encoding == "locale" :
2040- try :
2041- import locale
2042- except ImportError :
2043- # Importing locale may fail if Python is being built
2044- encoding = "utf-8"
2045- else :
2046- encoding = locale .getpreferredencoding (False )
2015+ encoding = self ._get_locale_encoding ()
20472016
20482017 if not isinstance (encoding , str ):
20492018 raise ValueError ("invalid encoding: %r" % encoding )
@@ -2176,6 +2145,8 @@ def reconfigure(self, *,
21762145 else :
21772146 if not isinstance (encoding , str ):
21782147 raise TypeError ("invalid encoding: %r" % encoding )
2148+ if encoding == "locale" :
2149+ encoding = self ._get_locale_encoding ()
21792150
21802151 if newline is Ellipsis :
21812152 newline = self ._readnl
@@ -2243,8 +2214,9 @@ def write(self, s):
22432214 self .buffer .write (b )
22442215 if self ._line_buffering and (haslf or "\r " in s ):
22452216 self .flush ()
2246- self ._set_decoded_chars ('' )
2247- self ._snapshot = None
2217+ if self ._snapshot is not None :
2218+ self ._set_decoded_chars ('' )
2219+ self ._snapshot = None
22482220 if self ._decoder :
22492221 self ._decoder .reset ()
22502222 return length
@@ -2280,6 +2252,15 @@ def _get_decoded_chars(self, n=None):
22802252 self ._decoded_chars_used += len (chars )
22812253 return chars
22822254
2255+ def _get_locale_encoding (self ):
2256+ try :
2257+ import locale
2258+ except ImportError :
2259+ # Importing locale may fail if Python is being built
2260+ return "utf-8"
2261+ else :
2262+ return locale .getencoding ()
2263+
22832264 def _rewind_decoded_chars (self , n ):
22842265 """Rewind the _decoded_chars buffer."""
22852266 if self ._decoded_chars_used < n :
@@ -2549,8 +2530,9 @@ def read(self, size=None):
25492530 # Read everything.
25502531 result = (self ._get_decoded_chars () +
25512532 decoder .decode (self .buffer .read (), final = True ))
2552- self ._set_decoded_chars ('' )
2553- self ._snapshot = None
2533+ if self ._snapshot is not None :
2534+ self ._set_decoded_chars ('' )
2535+ self ._snapshot = None
25542536 return result
25552537 else :
25562538 # Keep reading chunks until we have size characters to return.
0 commit comments