Skip to content

Commit 5cfb598

Browse files
committed
add Python implementation
1 parent 73dc1c6 commit 5cfb598

File tree

1 file changed

+249
-1
lines changed

1 file changed

+249
-1
lines changed

Lib/_pyio.py

Lines changed: 249 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -551,10 +551,50 @@ def nreadahead():
551551
break
552552
return bytes(res)
553553

554+
def backreadline(self, size=-1):
555+
r"""Read and return a line of bytes from the end of the stream.
556+
557+
If size is specified and is an integer, at most size bytes will be
558+
read and only the last size bytes of the current line will be returned.
559+
560+
For instance, using 'backreadline(4)' on 'first line\nsecond line'
561+
would return 'line' (and not 'enil' as opposed to 'backread(4)').
562+
563+
The line terminator is always b'\n' for binary files; for text
564+
files, the newlines argument to open can be used to select the
565+
line terminator(s) to be recognized.
566+
"""
567+
if size is None:
568+
size = -1
569+
else:
570+
try:
571+
size_index = size.__index__
572+
except AttributeError:
573+
raise TypeError(f"{size!r} is not an integer")
574+
else:
575+
size = size_index()
576+
rev_res, count = bytearray(), 0
577+
while size < 0 or count < size:
578+
b = self.backread(1)
579+
if not b:
580+
break
581+
rev_res += b
582+
count += 1
583+
if res.endswith(b"\n"):
584+
break
585+
# reverse the characters in the line
586+
return bytes(reversed(rev_res))
587+
554588
def __iter__(self):
555589
self._checkClosed()
556590
return self
557591

592+
def __reversed__(self):
593+
self._checkClosed()
594+
while line := self.backreadline():
595+
yield line
596+
raise StopIteration
597+
558598
def __next__(self):
559599
line = self.readline()
560600
if not line:
@@ -623,6 +663,26 @@ def read(self, size=-1):
623663
del b[n:]
624664
return bytes(b)
625665

666+
def backread(self, size=-1):
667+
"""Read backwards and return up to size bytes, where size is an int.
668+
669+
Returns an empty bytes object on BOF, or None if the object is
670+
set not to block and has no data to read.
671+
672+
The returned bytes are given in reversed order they are being read,
673+
e.g., reading the 4 last bytes of 'abc-def' returns fed-'.
674+
"""
675+
if size is None:
676+
size = -1
677+
if size < 0:
678+
return self.backreadall()
679+
b = bytearray(size.__index__())
680+
n = self.backreadinto(b)
681+
if n is None:
682+
return None
683+
del b[n:]
684+
return bytes(b)
685+
626686
def readall(self):
627687
"""Read until EOF, using multiple read() call."""
628688
res = bytearray()
@@ -634,6 +694,17 @@ def readall(self):
634694
# b'' or None
635695
return data
636696

697+
def backreadall(self):
698+
"""Read until BOF, using multiple backread() call."""
699+
res = bytearray()
700+
while data := self.backread(DEFAULT_BUFFER_SIZE):
701+
res += data
702+
if res:
703+
return bytes(res)
704+
else:
705+
# b'' or None
706+
return data
707+
637708
def readinto(self, b):
638709
"""Read bytes into a pre-allocated bytes-like object b.
639710
@@ -642,6 +713,17 @@ def readinto(self, b):
642713
"""
643714
self._unsupported("readinto")
644715

716+
def backreadinto(self, b):
717+
"""Read backwards bytes into a pre-allocated bytes-like object b.
718+
719+
Returns an int representing the number of bytes read (0 for BOF), or
720+
None if the object is set not to block and has no data to read.
721+
722+
For instance, back-reading 'abc-def' into a bytearray of length 3
723+
sets the content of the latter to 'fed' and returns 3.
724+
"""
725+
self._unsupported("backreadinto")
726+
645727
def write(self, b):
646728
"""Write the given buffer to the IO stream.
647729
@@ -692,6 +774,26 @@ def read(self, size=-1):
692774
"""
693775
self._unsupported("read")
694776

777+
def backread(self, size=-1):
778+
"""Read from the end and return up to size bytes, where size is an int.
779+
780+
If the argument is omitted, None, or negative, reads and
781+
returns all data until BOF (beginning of file).
782+
783+
If the argument is positive, and the underlying raw stream is
784+
not 'interactive', multiple raw reads may be issued to satisfy
785+
the byte count (unless BOF is reached first). But for
786+
interactive raw streams (XXX and for pipes?), at most one raw
787+
read will be issued, and a short result does not imply that
788+
BOF is imminent.
789+
790+
Returns an empty bytes array on BOF.
791+
792+
Raises BlockingIOError if the underlying raw stream has no
793+
data at the moment.
794+
"""
795+
self._unsupported("backread")
796+
695797
def read1(self, size=-1):
696798
"""Read up to size bytes with at most one read() system call,
697799
where size is an int.
@@ -723,6 +825,15 @@ def readinto1(self, b):
723825

724826
return self._readinto(b, read1=True)
725827

828+
def backreadinto(self, b):
829+
if not isinstance(b, memoryview):
830+
b = memoryview(b)
831+
b = b.cast('B')
832+
data = self.backread(len(b))
833+
n = len(data)
834+
b[:n] = data
835+
return n
836+
726837
def _readinto(self, b, read1):
727838
if not isinstance(b, memoryview):
728839
b = memoryview(b)
@@ -928,6 +1039,28 @@ def read(self, size=-1):
9281039
self._pos = newpos
9291040
return bytes(b)
9301041

1042+
def backread(self, size=-1):
1043+
if self.closed:
1044+
raise ValueError("read from closed file")
1045+
if len(self._buffer) <= self._pos:
1046+
return b''
1047+
if size is None:
1048+
size = -1
1049+
else:
1050+
try:
1051+
size_index = size.__index__
1052+
except AttributeError:
1053+
raise TypeError(f"{size!r} is not an integer")
1054+
else:
1055+
size = size_index()
1056+
if size < 0:
1057+
self._pos = 0
1058+
return bytes(reversed(self._buffer))
1059+
n = max(0, self._pos - size)
1060+
b = self._buffer[n : self._pos]
1061+
self._pos = n
1062+
return bytes(reversed(b))
1063+
9311064
def read1(self, size=-1):
9321065
"""This is the same as read.
9331066
"""
@@ -1107,6 +1240,71 @@ def _read_unlocked(self, n=None):
11071240
self._read_pos = 0
11081241
return out[:n] if out else nodata_val
11091242

1243+
def backread(self, size=-1):
1244+
self._checkClosed("backread of closed file")
1245+
if size is not None and size < -1:
1246+
raise ValueError("invalid number of bytes to read")
1247+
with self._read_lock:
1248+
return self._backread_unlocked(size)
1249+
1250+
def _backread_unlocked(self, n=None):
1251+
nodata_val = b""
1252+
empty_vals = (b"", None)
1253+
1254+
if n is None or n == -1:
1255+
buf, pos = self._read_buf, self._read_pos
1256+
self._reset_read_buf()
1257+
1258+
head = buf[pos::-1]
1259+
if hasattr(self.raw, 'backreadall'):
1260+
chunk = self.raw.backreadall()
1261+
if chunk is None:
1262+
return head or None
1263+
return head + chunk
1264+
elif hasattr(self.raw, 'readall'):
1265+
chunk = self.raw.readall()
1266+
if chunk is None:
1267+
return head or None
1268+
return head + chunk[::-1]
1269+
chunks = [head]
1270+
while True:
1271+
# Read until BOF or until backread() would block.
1272+
chunk = self.raw.backread()
1273+
if chunk in empty_values:
1274+
nodata_val = chunk
1275+
break
1276+
chunks.append(chunk)
1277+
return b"".join(chunks) or nodata_val
1278+
1279+
# Note: it is possible to further optimize this routine by first
1280+
# checking whether we are already at the end of the file or not.
1281+
# If so, we could just return the cached buffer but this requires
1282+
# a call to peek(). For now, we will not implement that approach
1283+
# and simply reset the cache and move the cursor to the end.
1284+
self.seek(0, 2)
1285+
1286+
# Split the data into chunks from the right and read
1287+
# them one by one, until encountering BOF or after 'n'
1288+
# bytes were read.
1289+
chunks = []
1290+
count = 0
1291+
chunk_size = min(self.buffer_size, n)
1292+
while count < n:
1293+
chunk = self.raw.backward(chunk_size)
1294+
if chunk in empty_values:
1295+
# We read everything in backward order and we could have cached
1296+
# the reversed result to speed-up future calls to forward reads
1297+
# but backread() is mostly used to reduce the memory complexity
1298+
# of read(), so we do not cache it.
1299+
nodata_val = chunk
1300+
break
1301+
count += len(chunk)
1302+
chunks.append(chunk)
1303+
1304+
out = b"".join(chunks)
1305+
self._reset_read_buf()
1306+
return out[:n] if out else nodata_val
1307+
11101308
def peek(self, size=0):
11111309
"""Returns buffered bytes without advancing the position.
11121310
@@ -1351,9 +1549,17 @@ def read(self, size=-1):
13511549
size = -1
13521550
return self.reader.read(size)
13531551

1552+
def backread(self, size=-1):
1553+
if size is None:
1554+
size = -1
1555+
return self.reader.backread(size)
1556+
13541557
def readinto(self, b):
13551558
return self.reader.readinto(b)
13561559

1560+
def backreadinto(self, b):
1561+
return self.reader.backreadinto(b)
1562+
13571563
def write(self, b):
13581564
return self.writer.write(b)
13591565

@@ -1438,10 +1644,20 @@ def read(self, size=None):
14381644
self.flush()
14391645
return BufferedReader.read(self, size)
14401646

1647+
def backread(self, size=None):
1648+
if size is None:
1649+
size = -1
1650+
self.flush()
1651+
return BufferedReader.backread(self, size)
1652+
14411653
def readinto(self, b):
14421654
self.flush()
14431655
return BufferedReader.readinto(self, b)
14441656

1657+
def backreadinto(self, b):
1658+
self.flush()
1659+
return BufferedReader.backreadinto(self, b)
1660+
14451661
def peek(self, size=0):
14461662
self.flush()
14471663
return BufferedReader.peek(self, size)
@@ -1689,6 +1905,14 @@ def readinto(self, b):
16891905
m[:n] = data
16901906
return n
16911907

1908+
def backreadinto(self, b):
1909+
"""Same as RawIOBase.backreadinto()."""
1910+
m = memoryview(b).cast('B')
1911+
data = self.backread(len(m))
1912+
n = len(data)
1913+
m[:n] = data
1914+
return n
1915+
16921916
def write(self, b):
16931917
"""Write bytes b to file, return number written.
16941918
@@ -1829,6 +2053,17 @@ def read(self, size=-1):
18292053
"""
18302054
self._unsupported("read")
18312055

2056+
def backread(self, size=-1):
2057+
"""Read backwards at most size characters from stream,
2058+
where size is an int.
2059+
2060+
Read from underlying buffer until we have size characters or we hit BOF.
2061+
If size is negative or omitted, read until BOF.
2062+
2063+
Returns a string.
2064+
"""
2065+
self._unsupported("backread")
2066+
18322067
def write(self, s):
18332068
"""Write string s to stream and returning an int."""
18342069
self._unsupported("write")
@@ -1837,13 +2072,20 @@ def truncate(self, pos=None):
18372072
"""Truncate size to pos, where pos is an int."""
18382073
self._unsupported("truncate")
18392074

1840-
def readline(self):
2075+
def readline(self, size=None):
18412076
"""Read until newline or EOF.
18422077
18432078
Returns an empty string if EOF is hit immediately.
18442079
"""
18452080
self._unsupported("readline")
18462081

2082+
def backreadline(self, size=None):
2083+
"""Read until newline or BOF.
2084+
2085+
Returns an empty string if BOF is hit immediately.
2086+
"""
2087+
self._unsupported("backreadline")
2088+
18472089
def detach(self):
18482090
"""
18492091
Separate the underlying buffer from the TextIOBase and return it.
@@ -2533,6 +2775,9 @@ def read(self, size=None):
25332775
result += self._get_decoded_chars(size - len(result))
25342776
return result
25352777

2778+
def backread(self, size=None):
2779+
self._unsupported("backread")
2780+
25362781
def __next__(self):
25372782
self._telling = False
25382783
line = self.readline()
@@ -2635,6 +2880,9 @@ def readline(self, size=None):
26352880
self._rewind_decoded_chars(len(line) - endpos)
26362881
return line[:endpos]
26372882

2883+
def backreadline(self, size=None):
2884+
self._unsupported("backreadline")
2885+
26382886
@property
26392887
def newlines(self):
26402888
return self._decoder.newlines if self._decoder else None

0 commit comments

Comments
 (0)