|
| 1 | +# cython: language_level=3 |
| 2 | +# cython: boundscheck=False |
| 3 | +# cython: wraparound=False |
| 4 | +# cython: cdivision=True |
| 5 | +# cython: initializedcheck=False |
| 6 | +# cython: nonecheck=False |
| 7 | + |
| 8 | +""" |
| 9 | +Cython-optimized MemoryViewStream for high-performance memoryview reading. |
| 10 | +""" |
| 11 | + |
| 12 | +import io |
| 13 | + |
| 14 | +cdef class MemoryViewStream: |
| 15 | + """ |
| 16 | + Handle a memoryview like a stream without converting to bytes. |
| 17 | +
|
| 18 | + Optimized Cython implementation for maximum performance. |
| 19 | + """ |
| 20 | + cdef: |
| 21 | + const unsigned char[:] mv # Typed memoryview for direct access |
| 22 | + Py_ssize_t offset |
| 23 | + bint _closed |
| 24 | + Py_ssize_t _len |
| 25 | + object _underlying_bytes # bytes object if available |
| 26 | + |
| 27 | + def __init__(self, object mv): |
| 28 | + self.mv = mv |
| 29 | + self.offset = 0 |
| 30 | + self._closed = False |
| 31 | + self._len = len(mv) |
| 32 | + # Check if we can use the underlying bytes object directly |
| 33 | + self._underlying_bytes = mv.obj if isinstance(mv.obj, bytes) else None |
| 34 | + |
| 35 | + cpdef read(self, Py_ssize_t n=-1): |
| 36 | + """Read and return up to n bytes.""" |
| 37 | + if self._closed: |
| 38 | + raise ValueError("I/O operation on closed file.") |
| 39 | + |
| 40 | + cdef: |
| 41 | + Py_ssize_t offset = self.offset |
| 42 | + Py_ssize_t length = self._len |
| 43 | + Py_ssize_t bytes_to_read |
| 44 | + |
| 45 | + if offset >= length: |
| 46 | + return b"" |
| 47 | + |
| 48 | + if n < 0 or offset + n > length: |
| 49 | + bytes_to_read = length - offset |
| 50 | + else: |
| 51 | + bytes_to_read = n |
| 52 | + |
| 53 | + # Fast path: if backed by bytes, slice directly (no copy) |
| 54 | + if self._underlying_bytes is not None: |
| 55 | + result = self._underlying_bytes[offset : offset + bytes_to_read] |
| 56 | + self.offset = offset + bytes_to_read |
| 57 | + return result |
| 58 | + |
| 59 | + # Use memoryview slicing which is more efficient than tobytes() |
| 60 | + result = self.mv[offset : offset + bytes_to_read] |
| 61 | + self.offset = offset + bytes_to_read |
| 62 | + # Use bytes() constructor instead of tobytes() for better performance |
| 63 | + return bytes(result) |
| 64 | + |
| 65 | + cpdef Py_ssize_t readinto(self, bytearray b): |
| 66 | + """Read bytes into a pre-allocated buffer (zero-copy when possible).""" |
| 67 | + if self._closed: |
| 68 | + raise ValueError("I/O operation on closed file.") |
| 69 | + |
| 70 | + cdef: |
| 71 | + Py_ssize_t n = len(b) |
| 72 | + Py_ssize_t bytes_available = self._len - self.offset |
| 73 | + Py_ssize_t bytes_to_read |
| 74 | + |
| 75 | + if bytes_available <= 0: |
| 76 | + return 0 |
| 77 | + |
| 78 | + bytes_to_read = n if n < bytes_available else bytes_available |
| 79 | + |
| 80 | + # Direct memory copy for maximum performance |
| 81 | + cdef unsigned char[:] b_view = b |
| 82 | + cdef Py_ssize_t i |
| 83 | + for i in range(bytes_to_read): |
| 84 | + b_view[i] = self.mv[self.offset + i] |
| 85 | + |
| 86 | + self.offset += bytes_to_read |
| 87 | + return bytes_to_read |
| 88 | + |
| 89 | + cpdef read1(self, Py_ssize_t n=-1): |
| 90 | + """Read and return up to n bytes (same as read for this implementation).""" |
| 91 | + return self.read(n) |
| 92 | + |
| 93 | + cpdef Py_ssize_t seek(self, Py_ssize_t offset, int whence=0): |
| 94 | + """Change stream position.""" |
| 95 | + if self._closed: |
| 96 | + raise ValueError("I/O operation on closed file.") |
| 97 | + |
| 98 | + cdef Py_ssize_t new_offset |
| 99 | + |
| 100 | + if whence == 0: # SEEK_SET |
| 101 | + new_offset = offset |
| 102 | + elif whence == 1: # SEEK_CUR |
| 103 | + new_offset = self.offset + offset |
| 104 | + elif whence == 2: # SEEK_END |
| 105 | + new_offset = self._len + offset |
| 106 | + else: |
| 107 | + raise ValueError(f"Invalid value for whence: {whence}") |
| 108 | + |
| 109 | + # Clamp to valid range |
| 110 | + if new_offset < 0: |
| 111 | + new_offset = 0 |
| 112 | + elif new_offset > self._len: |
| 113 | + new_offset = self._len |
| 114 | + |
| 115 | + self.offset = new_offset |
| 116 | + return self.offset |
| 117 | + |
| 118 | + cpdef Py_ssize_t tell(self): |
| 119 | + """Return current stream position.""" |
| 120 | + return self.offset |
| 121 | + |
| 122 | + def readable(self): |
| 123 | + """Return whether object supports reading.""" |
| 124 | + return True |
| 125 | + |
| 126 | + def writable(self): |
| 127 | + """Return whether object supports writing.""" |
| 128 | + return False |
| 129 | + |
| 130 | + def seekable(self): |
| 131 | + """Return whether object supports random access.""" |
| 132 | + return True |
| 133 | + |
| 134 | + cpdef close(self): |
| 135 | + """Close the stream.""" |
| 136 | + self._closed = True |
| 137 | + |
| 138 | + @property |
| 139 | + def closed(self): |
| 140 | + """Return whether the stream is closed.""" |
| 141 | + return self._closed |
| 142 | + |
| 143 | + @property |
| 144 | + def mode(self): |
| 145 | + """Return the mode of the stream.""" |
| 146 | + return "rb" |
| 147 | + |
| 148 | + def __len__(self): |
| 149 | + """Return the length of the underlying buffer.""" |
| 150 | + return self._len |
| 151 | + |
| 152 | + def __enter__(self): |
| 153 | + """Context manager entry.""" |
| 154 | + return self |
| 155 | + |
| 156 | + def __exit__(self, exc_type, exc_val, exc_tb): |
| 157 | + """Context manager exit.""" |
| 158 | + self.close() |
| 159 | + |
| 160 | + def __iter__(self): |
| 161 | + """Return an iterator over the memoryview.""" |
| 162 | + return self |
| 163 | + |
| 164 | + def __next__(self): |
| 165 | + """Return the next byte.""" |
| 166 | + if self._closed: |
| 167 | + raise ValueError("I/O operation on closed file.") |
| 168 | + if self.offset >= self._len: |
| 169 | + raise StopIteration() |
| 170 | + |
| 171 | + # Direct access to memoryview for maximum performance |
| 172 | + cdef unsigned char byte = self.mv[self.offset] |
| 173 | + self.offset += 1 |
| 174 | + return bytes([byte]) |
| 175 | + |
| 176 | + def fileno(self): |
| 177 | + """Return file descriptor (not supported).""" |
| 178 | + return -1 |
| 179 | + |
| 180 | + def flush(self): |
| 181 | + """Flush write buffers (not supported).""" |
| 182 | + raise io.UnsupportedOperation() |
| 183 | + |
| 184 | + def isatty(self): |
| 185 | + """Return whether this is an interactive stream.""" |
| 186 | + return False |
| 187 | + |
| 188 | + def readline(self, limit=-1): |
| 189 | + """Read and return a line (not supported).""" |
| 190 | + raise io.UnsupportedOperation() |
| 191 | + |
| 192 | + def readlines(self, hint=-1): |
| 193 | + """Read and return a list of lines (not supported).""" |
| 194 | + raise io.UnsupportedOperation() |
| 195 | + |
| 196 | + def truncate(self, size=None): |
| 197 | + """Truncate file to size (not supported).""" |
| 198 | + raise io.UnsupportedOperation() |
| 199 | + |
| 200 | + def write(self, s): |
| 201 | + """Write string to file (not supported).""" |
| 202 | + raise io.UnsupportedOperation() |
| 203 | + |
| 204 | + def writelines(self, lines): |
| 205 | + """Write a list of lines to stream (not supported).""" |
| 206 | + raise io.UnsupportedOperation() |
| 207 | + |
| 208 | +# Additional high-performance helper functions |
| 209 | +cdef class MemoryViewStreamOptimized(MemoryViewStream): |
| 210 | + """ |
| 211 | + Further optimized version with additional performance enhancements. |
| 212 | + """ |
| 213 | + |
| 214 | + cpdef const unsigned char[:] read_memoryview(self, Py_ssize_t n=-1): |
| 215 | + """ |
| 216 | + Read as memoryview instead of bytes (zero-copy). |
| 217 | +
|
| 218 | + This avoids the copy in read() but the returned memoryview |
| 219 | + becomes invalid if the underlying buffer changes. |
| 220 | + """ |
| 221 | + if self._closed: |
| 222 | + raise ValueError("I/O operation on closed file.") |
| 223 | + |
| 224 | + cdef: |
| 225 | + Py_ssize_t offset = self.offset |
| 226 | + Py_ssize_t length = self._len |
| 227 | + Py_ssize_t bytes_to_read |
| 228 | + |
| 229 | + if offset >= length: |
| 230 | + return self.mv[0:0] # Empty memoryview |
| 231 | + |
| 232 | + if n < 0 or offset + n > length: |
| 233 | + bytes_to_read = length - offset |
| 234 | + else: |
| 235 | + bytes_to_read = n |
| 236 | + |
| 237 | + result = self.mv[offset : offset + bytes_to_read] |
| 238 | + self.offset = offset + bytes_to_read |
| 239 | + return result |
| 240 | + |
| 241 | + cpdef Py_ssize_t readinto_memoryview(self, unsigned char[:] buffer): |
| 242 | + """ |
| 243 | + Read into existing memoryview (most efficient for large reads). |
| 244 | + """ |
| 245 | + if self._closed: |
| 246 | + raise ValueError("I/O operation on closed file.") |
| 247 | + |
| 248 | + cdef: |
| 249 | + Py_ssize_t n = buffer.shape[0] |
| 250 | + Py_ssize_t bytes_available = self._len - self.offset |
| 251 | + Py_ssize_t bytes_to_read |
| 252 | + Py_ssize_t i |
| 253 | + |
| 254 | + if bytes_available <= 0: |
| 255 | + return 0 |
| 256 | + |
| 257 | + bytes_to_read = n if n < bytes_available else bytes_available |
| 258 | + |
| 259 | + # Direct memory copy - fastest possible |
| 260 | + for i in range(bytes_to_read): |
| 261 | + buffer[i] = self.mv[self.offset + i] |
| 262 | + |
| 263 | + self.offset += bytes_to_read |
| 264 | + return bytes_to_read |
0 commit comments