mabel-dev
diff --git a/‎examples/disk_reader_usage.py‎
Lines changed: 91 additions & 0 deletions b/‎examples/disk_reader_usage.py‎
Lines changed: 91 additions & 0 deletions
diff --git a/‎opteryx/__version__.py‎
Lines changed: 2 additions & 2 deletions b/‎opteryx/__version__.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎opteryx/compiled/io/disk_reader.h‎
Lines changed: 40 additions & 0 deletions b/‎opteryx/compiled/io/disk_reader.h‎
Lines changed: 40 additions & 0 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion b/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/cpp/disk_io.cpp‎
Lines changed: 26 additions & 0 deletions b/‎src/cpp/disk_io.cpp‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎tests/compiled/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎tests/compiled/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎tests/compiled/io/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎tests/compiled/io/__init__.py‎
Lines changed: 1 addition & 0 deletions
@@ -0,0 +1,91 @@
+#!/usr/bin/env python3
+"""
+Quick reference examples for the disk_reader module.
+"""
+
+from opteryx.compiled.io.disk_reader import read_file
+from opteryx.compiled.io.disk_reader import read_file_to_bytes
+
+
+# Example 1: Basic file reading
+def example_basic():
+    """Read a file and get its contents."""
+    data = read_file("temp.json")  # Returns memoryview
+    print(f"File size: {len(data)} bytes")
+    
+    # Convert to bytes if needed
+    data_bytes = bytes(data)
+    print(f"First 50 chars: {data_bytes[:50]}")
+
+
+# Example 2: Stream large files without cache pollution
+def example_streaming():
+    """Process multiple large files efficiently."""
+    large_files = ["planets-gw0.duckdb", "planets-gw1.duckdb"]
+    
+    for filename in large_files:
+        # Read and evict from cache to save memory
+        data = read_file(filename, drop_after=True)
+        print(f"{filename}: {len(data):,} bytes")
+
+
+# Example 3: Zero-copy operations with memoryview
+def example_zero_copy():
+    """Efficiently slice data without copying."""
+    data = read_file("temp.csv")
+    
+    # These operations don't copy the underlying data
+    first_line = data[:data.tobytes().find(b'\n')]
+    
+    print(f"First line: {bytes(first_line)}")
+
+
+# Example 4: Using read_file_to_bytes for convenience
+def example_bytes():
+    """Get bytes directly instead of memoryview."""
+    data = read_file_to_bytes("temp.md")
+    
+    # Can use all bytes methods directly
+    lines = data.split(b'\n')
+    print(f"Number of lines: {len(lines)}")
+
+
+# Example 5: I/O hints for optimal performance
+def example_io_hints():
+    """Control caching behavior for different scenarios."""
+    
+    # For large sequential reads (optimal)
+    data = read_file("large_file.bin", sequential=True, willneed=True)
+    
+    # For random access patterns
+    data = read_file("index_file.bin", sequential=False)
+    
+    # For one-time processing of huge files
+    data = read_file("temporary_data.bin", drop_after=True)
+
+
+if __name__ == "__main__":
+    import sys
+    
+    print("disk_reader Quick Examples")
+    print("=" * 60)
+    
+    try:
+        print("\n1. Basic Reading:")
+        example_basic()
+        
+        print("\n2. Streaming Large Files:")
+        example_streaming()
+        
+        print("\n3. Zero-Copy Operations:")
+        example_zero_copy()
+        
+        print("\n4. Bytes Convenience Method:")
+        example_bytes()
+        
+        print("\n" + "=" * 60)
+        print("✓ All examples completed successfully!")
+        
+    except Exception as e:
+        print(f"\nError: {e}")
+        sys.exit(1)
@@ -1,9 +1,9 @@
 # THIS FILE IS AUTOMATICALLY UPDATED DURING THE BUILD PROCESS
 # DO NOT EDIT THIS FILE DIRECTLY
 
-__build__ = 1704
+__build__ = 1705
 __author__ = "@joocer"
-__version__ = "0.26.0-beta.1704"
+__version__ = "0.26.0-beta.1705"
 
 # Store the version here so:
 # 1) we don't load dependencies by storing it in __init__.py
 
@@ -0,0 +1,40 @@
+#ifndef DISK_READER_H
+#define DISK_READER_H
+
+#include <cstddef>
+#include <cstdint>
+
+/**
+ * Fast disk reader with platform-specific I/O optimizations
+ * 
+ * @param path File path to read
+ * @param dst Destination buffer (must be pre-allocated)
+ * @param out_len Output parameter for bytes read
+ * @param sequential Hint for sequential access pattern
+ * @param willneed Hint that data will be needed soon (prefetch)
+ * @param drop_after Drop page cache after reading
+ * @return 0 on success, negative errno on failure
+ */
+int read_all_pread(const char* path, uint8_t* dst, size_t* out_len,
+                   bool sequential, bool willneed, bool drop_after);
+
+/**
+ * Memory-map a file for reading
+ * 
+ * @param path File path to map
+ * @param dst Output parameter for mapped memory address
+ * @param out_len Output parameter for file size
+ * @return 0 on success, negative errno on failure
+ */
+int read_all_mmap(const char* path, uint8_t** dst, size_t* out_len);
+
+/**
+ * Unmap memory that was mapped with read_all_mmap
+ * 
+ * @param addr Address to unmap
+ * @param size Size of the mapped region
+ * @return 0 on success, negative errno on failure
+ */
+int unmap_memory_c(uint8_t* addr, size_t size);
+
+#endif // DISK_READER_H
@@ -1,6 +1,6 @@
 [project]
 name = "opteryx"
-version = "0.26.0-beta.1704"
+version = "0.26.0-beta.1705"
 description = "Query your data, where it lives"
 requires-python = '>=3.11'
 readme = {file = "README.md", content-type = "text/markdown"}
 
@@ -167,6 +167,15 @@ int read_all_mmap(const char* path, uint8_t** dst, size_t* out_len) {
     }
 
     size_t size = static_cast<size_t>(st.st_size);
+    
+    // Handle empty files - mmap doesn't work with size 0
+    if (size == 0) {
+        close(fd);
+        *dst = nullptr;
+        *out_len = 0;
+        return 0;
+    }
+    
     void* mapped = mmap(NULL, size, PROT_READ, MAP_PRIVATE | MAP_POPULATE, fd, 0);
     close(fd);
 
@@ -191,6 +200,15 @@ int read_all_mmap(const char* path, uint8_t** dst, size_t* out_len) {
     }
 
     size_t size = static_cast<size_t>(st.st_size);
+    
+    // Handle empty files - mmap doesn't work with size 0
+    if (size == 0) {
+        close(fd);
+        *dst = nullptr;
+        *out_len = 0;
+        return 0;
+    }
+    
     void* mapped = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
     close(fd);
 
@@ -214,6 +232,14 @@ int read_all_mmap(const char* path, uint8_t** dst, size_t* out_len) {
     DWORD sizeLow = GetFileSize(hFile, &sizeHigh);
     size_t size = (static_cast<size_t>(sizeHigh) << 32) | sizeLow;
 
+    // Handle empty files
+    if (size == 0) {
+        CloseHandle(hFile);
+        *dst = nullptr;
+        *out_len = 0;
+        return 0;
+    }
+
     HANDLE hMapping = CreateFileMappingA(hFile, NULL, PAGE_READONLY, 0, 0, NULL);
     if (!hMapping) {
         CloseHandle(hFile);
 
@@ -0,0 +1 @@
+# Compiled module tests
@@ -0,0 +1 @@
+# I/O module tests