|  | 
|  | 1 | +#!/usr/bin/env python3 | 
|  | 2 | +""" | 
|  | 3 | +Quick reference examples for the disk_reader module. | 
|  | 4 | +""" | 
|  | 5 | + | 
|  | 6 | +from opteryx.compiled.io.disk_reader import read_file | 
|  | 7 | +from opteryx.compiled.io.disk_reader import read_file_to_bytes | 
|  | 8 | + | 
|  | 9 | + | 
|  | 10 | +# Example 1: Basic file reading | 
|  | 11 | +def example_basic(): | 
|  | 12 | +    """Read a file and get its contents.""" | 
|  | 13 | +    data = read_file("temp.json")  # Returns memoryview | 
|  | 14 | +    print(f"File size: {len(data)} bytes") | 
|  | 15 | +     | 
|  | 16 | +    # Convert to bytes if needed | 
|  | 17 | +    data_bytes = bytes(data) | 
|  | 18 | +    print(f"First 50 chars: {data_bytes[:50]}") | 
|  | 19 | + | 
|  | 20 | + | 
|  | 21 | +# Example 2: Stream large files without cache pollution | 
|  | 22 | +def example_streaming(): | 
|  | 23 | +    """Process multiple large files efficiently.""" | 
|  | 24 | +    large_files = ["planets-gw0.duckdb", "planets-gw1.duckdb"] | 
|  | 25 | +     | 
|  | 26 | +    for filename in large_files: | 
|  | 27 | +        # Read and evict from cache to save memory | 
|  | 28 | +        data = read_file(filename, drop_after=True) | 
|  | 29 | +        print(f"{filename}: {len(data):,} bytes") | 
|  | 30 | + | 
|  | 31 | + | 
|  | 32 | +# Example 3: Zero-copy operations with memoryview | 
|  | 33 | +def example_zero_copy(): | 
|  | 34 | +    """Efficiently slice data without copying.""" | 
|  | 35 | +    data = read_file("temp.csv") | 
|  | 36 | +     | 
|  | 37 | +    # These operations don't copy the underlying data | 
|  | 38 | +    first_line = data[:data.tobytes().find(b'\n')] | 
|  | 39 | +     | 
|  | 40 | +    print(f"First line: {bytes(first_line)}") | 
|  | 41 | + | 
|  | 42 | + | 
|  | 43 | +# Example 4: Using read_file_to_bytes for convenience | 
|  | 44 | +def example_bytes(): | 
|  | 45 | +    """Get bytes directly instead of memoryview.""" | 
|  | 46 | +    data = read_file_to_bytes("temp.md") | 
|  | 47 | +     | 
|  | 48 | +    # Can use all bytes methods directly | 
|  | 49 | +    lines = data.split(b'\n') | 
|  | 50 | +    print(f"Number of lines: {len(lines)}") | 
|  | 51 | + | 
|  | 52 | + | 
|  | 53 | +# Example 5: I/O hints for optimal performance | 
|  | 54 | +def example_io_hints(): | 
|  | 55 | +    """Control caching behavior for different scenarios.""" | 
|  | 56 | +     | 
|  | 57 | +    # For large sequential reads (optimal) | 
|  | 58 | +    data = read_file("large_file.bin", sequential=True, willneed=True) | 
|  | 59 | +     | 
|  | 60 | +    # For random access patterns | 
|  | 61 | +    data = read_file("index_file.bin", sequential=False) | 
|  | 62 | +     | 
|  | 63 | +    # For one-time processing of huge files | 
|  | 64 | +    data = read_file("temporary_data.bin", drop_after=True) | 
|  | 65 | + | 
|  | 66 | + | 
|  | 67 | +if __name__ == "__main__": | 
|  | 68 | +    import sys | 
|  | 69 | +     | 
|  | 70 | +    print("disk_reader Quick Examples") | 
|  | 71 | +    print("=" * 60) | 
|  | 72 | +     | 
|  | 73 | +    try: | 
|  | 74 | +        print("\n1. Basic Reading:") | 
|  | 75 | +        example_basic() | 
|  | 76 | +         | 
|  | 77 | +        print("\n2. Streaming Large Files:") | 
|  | 78 | +        example_streaming() | 
|  | 79 | +         | 
|  | 80 | +        print("\n3. Zero-Copy Operations:") | 
|  | 81 | +        example_zero_copy() | 
|  | 82 | +         | 
|  | 83 | +        print("\n4. Bytes Convenience Method:") | 
|  | 84 | +        example_bytes() | 
|  | 85 | +         | 
|  | 86 | +        print("\n" + "=" * 60) | 
|  | 87 | +        print("✓ All examples completed successfully!") | 
|  | 88 | +         | 
|  | 89 | +    except Exception as e: | 
|  | 90 | +        print(f"\nError: {e}") | 
|  | 91 | +        sys.exit(1) | 
0 commit comments