|
| 1 | +#!/usr/bin/env python3 |
| 2 | +""" |
| 3 | +Quick reference examples for the disk_reader module. |
| 4 | +""" |
| 5 | + |
| 6 | +from opteryx.compiled.io.disk_reader import read_file |
| 7 | +from opteryx.compiled.io.disk_reader import read_file_to_bytes |
| 8 | + |
| 9 | + |
| 10 | +# Example 1: Basic file reading |
| 11 | +def example_basic(): |
| 12 | + """Read a file and get its contents.""" |
| 13 | + data = read_file("temp.json") # Returns memoryview |
| 14 | + print(f"File size: {len(data)} bytes") |
| 15 | + |
| 16 | + # Convert to bytes if needed |
| 17 | + data_bytes = bytes(data) |
| 18 | + print(f"First 50 chars: {data_bytes[:50]}") |
| 19 | + |
| 20 | + |
| 21 | +# Example 2: Stream large files without cache pollution |
| 22 | +def example_streaming(): |
| 23 | + """Process multiple large files efficiently.""" |
| 24 | + large_files = ["planets-gw0.duckdb", "planets-gw1.duckdb"] |
| 25 | + |
| 26 | + for filename in large_files: |
| 27 | + # Read and evict from cache to save memory |
| 28 | + data = read_file(filename, drop_after=True) |
| 29 | + print(f"{filename}: {len(data):,} bytes") |
| 30 | + |
| 31 | + |
| 32 | +# Example 3: Zero-copy operations with memoryview |
| 33 | +def example_zero_copy(): |
| 34 | + """Efficiently slice data without copying.""" |
| 35 | + data = read_file("temp.csv") |
| 36 | + |
| 37 | + # These operations don't copy the underlying data |
| 38 | + first_line = data[:data.tobytes().find(b'\n')] |
| 39 | + |
| 40 | + print(f"First line: {bytes(first_line)}") |
| 41 | + |
| 42 | + |
| 43 | +# Example 4: Using read_file_to_bytes for convenience |
| 44 | +def example_bytes(): |
| 45 | + """Get bytes directly instead of memoryview.""" |
| 46 | + data = read_file_to_bytes("temp.md") |
| 47 | + |
| 48 | + # Can use all bytes methods directly |
| 49 | + lines = data.split(b'\n') |
| 50 | + print(f"Number of lines: {len(lines)}") |
| 51 | + |
| 52 | + |
| 53 | +# Example 5: I/O hints for optimal performance |
| 54 | +def example_io_hints(): |
| 55 | + """Control caching behavior for different scenarios.""" |
| 56 | + |
| 57 | + # For large sequential reads (optimal) |
| 58 | + data = read_file("large_file.bin", sequential=True, willneed=True) |
| 59 | + |
| 60 | + # For random access patterns |
| 61 | + data = read_file("index_file.bin", sequential=False) |
| 62 | + |
| 63 | + # For one-time processing of huge files |
| 64 | + data = read_file("temporary_data.bin", drop_after=True) |
| 65 | + |
| 66 | + |
| 67 | +if __name__ == "__main__": |
| 68 | + import sys |
| 69 | + |
| 70 | + print("disk_reader Quick Examples") |
| 71 | + print("=" * 60) |
| 72 | + |
| 73 | + try: |
| 74 | + print("\n1. Basic Reading:") |
| 75 | + example_basic() |
| 76 | + |
| 77 | + print("\n2. Streaming Large Files:") |
| 78 | + example_streaming() |
| 79 | + |
| 80 | + print("\n3. Zero-Copy Operations:") |
| 81 | + example_zero_copy() |
| 82 | + |
| 83 | + print("\n4. Bytes Convenience Method:") |
| 84 | + example_bytes() |
| 85 | + |
| 86 | + print("\n" + "=" * 60) |
| 87 | + print("✓ All examples completed successfully!") |
| 88 | + |
| 89 | + except Exception as e: |
| 90 | + print(f"\nError: {e}") |
| 91 | + sys.exit(1) |
0 commit comments