disk connector tweaks

joocer · joocer · commit 2bcd505c4182 · 2025-10-19T21:22:17.000+01:00
diff --git a/opteryx/__version__.py b/opteryx/__version__.py
@@ -1,9 +1,9 @@
 # THIS FILE IS AUTOMATICALLY UPDATED DURING THE BUILD PROCESS
 # DO NOT EDIT THIS FILE DIRECTLY
 
-__build__ = 1673
+__build__ = 1674
 __author__ = "@joocer"
-__version__ = "0.26.0-beta.1673"
+__version__ = "0.26.0-beta.1674"
 
 # Store the version here so:
 # 1) we don't load dependencies by storing it in __init__.py
diff --git a/opteryx/compiled/structures/lru_k.pyx b/opteryx/compiled/structures/lru_k.pyx
@@ -177,7 +177,6 @@ cdef class LRU_K:
         """Evict one item using simplified LRU-K algorithm."""
         cdef bytes candidate_key = None
         cdef bytes candidate_value = None
-        cdef int64_t oldest_kth_time = -1
         cdef int64_t kth_time
         cdef list history
 
diff --git a/opteryx/connectors/disk_connector.py b/opteryx/connectors/disk_connector.py
@@ -8,8 +8,11 @@
 given as a folder on local disk
 """
 
+import contextlib
+import ctypes
 import mmap
 import os
+import platform
 import time
 from typing import Dict
 from typing import List
@@ -34,6 +37,7 @@
 
 OS_SEP = os.sep
 IS_WINDOWS = is_windows()
+IS_LINUX = platform.system() == "Linux"
 
 # Define os.O_BINARY for non-Windows platforms if it's not already defined
 if not hasattr(os, "O_BINARY"):
@@ -43,7 +47,12 @@
 
 mmap_config = {}
 if not IS_WINDOWS:
-    mmap_config["flags"] = mmap.MAP_PRIVATE
+    # prefer MAP_PRIVATE and on Linux enable MAP_POPULATE to fault pages in
+    flags = mmap.MAP_PRIVATE
+    if IS_LINUX and hasattr(mmap, "MAP_POPULATE"):
+        with contextlib.suppress(Exception):
+            flags |= mmap.MAP_POPULATE
+    mmap_config["flags"] = flags
     mmap_config["prot"] = mmap.PROT_READ
 else:
     mmap_config["access"] = mmap.ACCESS_READ
@@ -129,14 +138,42 @@ def read_blob(
             OSError:
                 If an I/O error occurs while reading the file.
         """
+        file_descriptor = None
+        _map = None
         try:
             file_descriptor = os.open(blob_name, os.O_RDONLY | os.O_BINARY)
+            # on platforms that support it give the kernel a hint about access pattern
             if hasattr(os, "posix_fadvise"):
-                os.posix_fadvise(file_descriptor, 0, 0, os.POSIX_FADV_WILLNEED)
+                # sequential access is the common pattern for dataset reads
+                try:
+                    os.posix_fadvise(file_descriptor, 0, 0, os.POSIX_FADV_SEQUENTIAL)
+                except OSError:
+                    # fallback to WILLNEED if SEQUENTIAL is not allowed
+                    with contextlib.suppress(Exception):
+                        os.posix_fadvise(file_descriptor, 0, 0, os.POSIX_FADV_WILLNEED)
+
             size = os.fstat(file_descriptor).st_size
             _map = mmap.mmap(file_descriptor, length=size, **mmap_config)
+
+            # On Linux advise the kernel that access will be sequential to improve readahead
+            if IS_LINUX:
+                try:
+                    libc = ctypes.CDLL("libc.so.6")
+                    # MADV_SEQUENTIAL is 2 on Linux, but don't hardcode if available
+                    MADV_SEQUENTIAL = 2
+                    addr = ctypes.c_void_p(ctypes.addressof(ctypes.c_char.from_buffer(_map)))
+                    length = ctypes.c_size_t(size)
+                    libc.madvise(addr, length, MADV_SEQUENTIAL)
+                except Exception:
+                    # best-effort: if anything goes wrong, ignore
+                    pass
+
+            # pass a memoryview of the mmap to decoders - this makes intent explicit
+            # and lets decoders that can accept memoryviews avoid extra copies
+            buffer = memoryview(_map)
+
             result = decoder(
-                _map,
+                buffer,
                 just_schema=just_schema,
                 projection=projection,
                 selection=selection,
@@ -146,14 +183,20 @@ def read_blob(
 
             if not just_schema:
                 stats = self.read_blob_statistics(
-                    blob_name=blob_name, blob_bytes=_map, decoder=decoder
+                    blob_name=blob_name, blob_bytes=buffer, decoder=decoder
                 )
                 if self.relation_statistics is None:
                     self.relation_statistics = stats
 
             return result
         finally:
-            os.close(file_descriptor)
+            # Ensure mmap is closed before closing the file descriptor
+            with contextlib.suppress(Exception):
+                if _map is not None:
+                    _map.close()
+            with contextlib.suppress(Exception):
+                if file_descriptor is not None:
+                    os.close(file_descriptor)
 
     @single_item_cache
     def get_list_of_blob_names(self, *, prefix: str) -> List[str]:
diff --git a/opteryx/utils/__init__.py b/opteryx/utils/__init__.py
@@ -8,9 +8,12 @@
 from typing import Iterable
 from typing import Optional
 
+from orso.tools import single_item_cache
+
 from opteryx.third_party.mbleven import compare
 
 
+@single_item_cache
 def is_windows() -> bool:
     return platform.system().lower() == "windows"
 
diff --git a/opteryx/utils/file_decoders.py b/opteryx/utils/file_decoders.py
@@ -181,6 +181,7 @@ def zstd_decoder(
     else:
         stream = buffer
 
+    # zstandard.open returns a file-like which we pass directly to jsonl_decoder
     with zstandard.open(stream, "rb") as file:
         return jsonl_decoder(
             file, projection=projection, selection=selection, just_schema=just_schema
@@ -318,9 +319,17 @@ def parquet_decoder(
 
     # If we're here, we can't use rugo - we need to read the file with pyarrow
 
-    # Open the parquet file only once
+    # Open the parquet file only once. Prefer pyarrow.BufferReader with a
+    # pyarrow.Buffer when we have a memoryview to avoid creating intermediate
+    # Python bytes objects.
     if isinstance(buffer, memoryview):
-        stream = MemoryViewStream(buffer)
+        # pyarrow.py_buffer accepts buffer-protocol objects and is zero-copy
+        try:
+            pa_buf = pyarrow.py_buffer(buffer)
+            stream = pyarrow.BufferReader(pa_buf)
+        except Exception:
+            # fallback to MemoryViewStream if pyarrow can't handle this memoryview
+            stream = MemoryViewStream(buffer)
     elif isinstance(buffer, bytes):
         stream = pyarrow.BufferReader(buffer)
     else:
@@ -444,10 +453,12 @@ def jsonl_decoder(
 
     from opteryx.third_party.tktech import csimdjson as simdjson
 
+    # Normalize inputs: accept memoryview, bytes, or file-like objects.
     if isinstance(buffer, memoryview):
-        # If it's a memoryview, we need to convert it to bytes
+        # Convert to bytes once; many downstream codepaths expect a bytes object
         buffer = buffer.tobytes()
-    if not isinstance(buffer, bytes):
+    elif not isinstance(buffer, bytes) and hasattr(buffer, "read"):
+        # file-like: read once into memory
         buffer = buffer.read()
 
     # If it's COUNT(*), we don't need to create a full dataset
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "opteryx"
-version = "0.26.0-beta.1673"
+version = "0.26.0-beta.1674"
 description = "Query your data, where it lives"
 requires-python = '>=3.11'
 readme = {file = "README.md", content-type = "text/markdown"}