88given as a folder on local disk
99"""
1010
11+ import contextlib
12+ import ctypes
1113import mmap
1214import os
15+ import platform
1316import time
1417from typing import Dict
1518from typing import List
3437
3538OS_SEP = os .sep
3639IS_WINDOWS = is_windows ()
40+ IS_LINUX = platform .system () == "Linux"
3741
3842# Define os.O_BINARY for non-Windows platforms if it's not already defined
3943if not hasattr (os , "O_BINARY" ):
4347
4448mmap_config = {}
4549if not IS_WINDOWS :
46- mmap_config ["flags" ] = mmap .MAP_PRIVATE
50+ # prefer MAP_PRIVATE and on Linux enable MAP_POPULATE to fault pages in
51+ flags = mmap .MAP_PRIVATE
52+ if IS_LINUX and hasattr (mmap , "MAP_POPULATE" ):
53+ with contextlib .suppress (Exception ):
54+ flags |= mmap .MAP_POPULATE
55+ mmap_config ["flags" ] = flags
4756 mmap_config ["prot" ] = mmap .PROT_READ
4857else :
4958 mmap_config ["access" ] = mmap .ACCESS_READ
@@ -129,14 +138,42 @@ def read_blob(
129138 OSError:
130139 If an I/O error occurs while reading the file.
131140 """
141+ file_descriptor = None
142+ _map = None
132143 try :
133144 file_descriptor = os .open (blob_name , os .O_RDONLY | os .O_BINARY )
145+ # on platforms that support it give the kernel a hint about access pattern
134146 if hasattr (os , "posix_fadvise" ):
135- os .posix_fadvise (file_descriptor , 0 , 0 , os .POSIX_FADV_WILLNEED )
147+ # sequential access is the common pattern for dataset reads
148+ try :
149+ os .posix_fadvise (file_descriptor , 0 , 0 , os .POSIX_FADV_SEQUENTIAL )
150+ except OSError :
151+ # fallback to WILLNEED if SEQUENTIAL is not allowed
152+ with contextlib .suppress (Exception ):
153+ os .posix_fadvise (file_descriptor , 0 , 0 , os .POSIX_FADV_WILLNEED )
154+
136155 size = os .fstat (file_descriptor ).st_size
137156 _map = mmap .mmap (file_descriptor , length = size , ** mmap_config )
157+
158+ # On Linux advise the kernel that access will be sequential to improve readahead
159+ if IS_LINUX :
160+ try :
161+ libc = ctypes .CDLL ("libc.so.6" )
162+ # MADV_SEQUENTIAL is 2 on Linux, but don't hardcode if available
163+ MADV_SEQUENTIAL = 2
164+ addr = ctypes .c_void_p (ctypes .addressof (ctypes .c_char .from_buffer (_map )))
165+ length = ctypes .c_size_t (size )
166+ libc .madvise (addr , length , MADV_SEQUENTIAL )
167+ except Exception :
168+ # best-effort: if anything goes wrong, ignore
169+ pass
170+
171+ # pass a memoryview of the mmap to decoders - this makes intent explicit
172+ # and lets decoders that can accept memoryviews avoid extra copies
173+ buffer = memoryview (_map )
174+
138175 result = decoder (
139- _map ,
176+ buffer ,
140177 just_schema = just_schema ,
141178 projection = projection ,
142179 selection = selection ,
@@ -146,14 +183,20 @@ def read_blob(
146183
147184 if not just_schema :
148185 stats = self .read_blob_statistics (
149- blob_name = blob_name , blob_bytes = _map , decoder = decoder
186+ blob_name = blob_name , blob_bytes = buffer , decoder = decoder
150187 )
151188 if self .relation_statistics is None :
152189 self .relation_statistics = stats
153190
154191 return result
155192 finally :
156- os .close (file_descriptor )
193+ # Ensure mmap is closed before closing the file descriptor
194+ with contextlib .suppress (Exception ):
195+ if _map is not None :
196+ _map .close ()
197+ with contextlib .suppress (Exception ):
198+ if file_descriptor is not None :
199+ os .close (file_descriptor )
157200
158201 @single_item_cache
159202 def get_list_of_blob_names (self , * , prefix : str ) -> List [str ]:
0 commit comments