|  | 
|  | 1 | +# cython: language_level=3 | 
|  | 2 | +# cython: nonecheck=False | 
|  | 3 | +# cython: cdivision=True | 
|  | 4 | +# cython: initializedcheck=False | 
|  | 5 | +# cython: infer_types=True | 
|  | 6 | +# cython: wraparound=False | 
|  | 7 | +# cython: boundscheck=False | 
|  | 8 | + | 
|  | 9 | +""" | 
|  | 10 | +Ultra-fast disk reader module | 
|  | 11 | +""" | 
|  | 12 | + | 
|  | 13 | +from cpython.buffer cimport PyBuffer_FillInfo | 
|  | 14 | +from libc.stdlib cimport free | 
|  | 15 | + | 
|  | 16 | +cdef extern from "disk_io.h": | 
|  | 17 | +    int read_all_pread(const char* path, unsigned char* dst, size_t* out_len, | 
|  | 18 | +                       bint sequential, bint willneed, bint drop_after) | 
|  | 19 | +    int read_all_mmap(const char* path, unsigned char** dst, size_t* out_len) | 
|  | 20 | +    int unmap_memory_c(unsigned char* addr, size_t size) | 
|  | 21 | + | 
|  | 22 | +cdef class MappedMemory: | 
|  | 23 | +    cdef unsigned char* data | 
|  | 24 | +    cdef size_t size | 
|  | 25 | +    cdef bint owned | 
|  | 26 | + | 
|  | 27 | +    def __dealloc__(self): | 
|  | 28 | +        if self.owned and self.data != NULL: | 
|  | 29 | +            # Free the allocated memory (for non-mmap case) | 
|  | 30 | +            free(self.data) | 
|  | 31 | + | 
|  | 32 | +    def __getbuffer__(self, Py_buffer* buffer, int flags): | 
|  | 33 | +        PyBuffer_FillInfo(buffer, self, self.data, self.size, 1, flags) | 
|  | 34 | + | 
|  | 35 | +    def __len__(self): | 
|  | 36 | +        return self.size | 
|  | 37 | + | 
|  | 38 | + | 
|  | 39 | +def read_file(str path, bint sequential=True, bint willneed=True, bint drop_after=False): | 
|  | 40 | +    """ | 
|  | 41 | +    Read an entire file into memory with optimized I/O. | 
|  | 42 | +    """ | 
|  | 43 | +    import os | 
|  | 44 | + | 
|  | 45 | +    if not os.path.exists(path): | 
|  | 46 | +        raise FileNotFoundError(f"File not found: {path}") | 
|  | 47 | + | 
|  | 48 | +    cdef size_t size = os.path.getsize(path) | 
|  | 49 | +    cdef size_t out_len = 0 | 
|  | 50 | + | 
|  | 51 | +    path_b = path.encode("utf-8") | 
|  | 52 | +    cdef const char* c_path = path_b | 
|  | 53 | + | 
|  | 54 | +    # Allocate buffer - use bytearray for mutable buffer | 
|  | 55 | +    buf = bytearray(size) | 
|  | 56 | +    cdef unsigned char[::1] buf_view = buf | 
|  | 57 | +    cdef unsigned char* dst = &buf_view[0] | 
|  | 58 | + | 
|  | 59 | +    cdef int rc = read_all_pread(c_path, dst, &out_len, sequential, willneed, drop_after) | 
|  | 60 | + | 
|  | 61 | +    if rc != 0: | 
|  | 62 | +        raise OSError(-rc, f"Failed to read file: {path}") | 
|  | 63 | + | 
|  | 64 | +    return memoryview(buf)[:out_len] | 
|  | 65 | + | 
|  | 66 | + | 
|  | 67 | +def read_file_mmap(str path): | 
|  | 68 | +    """ | 
|  | 69 | +    Read file using memory mapping - returns an object that provides memoryview interface | 
|  | 70 | +    but MUST be manually closed to avoid resource leaks. | 
|  | 71 | +    """ | 
|  | 72 | +    import os | 
|  | 73 | + | 
|  | 74 | +    if not os.path.exists(path): | 
|  | 75 | +        raise FileNotFoundError(f"File not found: {path}") | 
|  | 76 | + | 
|  | 77 | +    path_b = path.encode("utf-8") | 
|  | 78 | +    cdef const char* c_path = path_b | 
|  | 79 | +    cdef unsigned char* mapped_data = NULL | 
|  | 80 | +    cdef size_t size = 0 | 
|  | 81 | + | 
|  | 82 | +    cdef int rc = read_all_mmap(c_path, &mapped_data, &size) | 
|  | 83 | + | 
|  | 84 | +    if rc != 0: | 
|  | 85 | +        raise OSError(-rc, f"Failed to mmap file: {path}") | 
|  | 86 | + | 
|  | 87 | +    # Create wrapper that knows how to clean up | 
|  | 88 | +    cdef MappedMemory wrapper = MappedMemory.__new__(MappedMemory) | 
|  | 89 | +    wrapper.data = mapped_data | 
|  | 90 | +    wrapper.size = size | 
|  | 91 | +    wrapper.owned = False  # This is mmap'd memory, not malloc'd | 
|  | 92 | + | 
|  | 93 | +    return wrapper | 
|  | 94 | + | 
|  | 95 | + | 
|  | 96 | +def read_file_to_bytes(str path, bint sequential=True, bint willneed=True, bint drop_after=False): | 
|  | 97 | +    """ | 
|  | 98 | +    Read an entire file into memory as bytes. | 
|  | 99 | +    """ | 
|  | 100 | +    mv = read_file(path, sequential, willneed, drop_after) | 
|  | 101 | +    return bytes(mv) | 
|  | 102 | + | 
|  | 103 | + | 
|  | 104 | +def unmap_memory(mem_obj): | 
|  | 105 | +    """ | 
|  | 106 | +    Explicitly unmap memory from read_file_mmap. | 
|  | 107 | +    MUST be called when done with the data to avoid resource leaks. | 
|  | 108 | +    """ | 
|  | 109 | +    cdef int rc | 
|  | 110 | +    if hasattr(mem_obj, 'data') and mem_obj.data is not None: | 
|  | 111 | +        # Import the unmap function from your C code | 
|  | 112 | +        rc = unmap_memory_c(mem_obj.data, mem_obj.size) | 
|  | 113 | +        mem_obj.data = None | 
|  | 114 | +        return rc == 0 | 
|  | 115 | +    return True | 
0 commit comments