|
| 1 | +"""Consumer-side utility to get both raw and transformed header data with single filesystem read.""" |
| 2 | + |
| 3 | +from __future__ import annotations |
| 4 | + |
| 5 | +import numpy as np |
| 6 | +from typing import TYPE_CHECKING |
| 7 | +from segy.transforms import ByteSwapTransform |
| 8 | +from segy.transforms import IbmFloatTransform |
| 9 | + |
| 10 | +if TYPE_CHECKING: |
| 11 | + from segy.file import SegyFile |
| 12 | + from segy.indexing import HeaderIndexer |
| 13 | + from segy.transforms import Transform, TransformPipeline, ByteSwapTransform, IbmFloatTransform |
| 14 | + from numpy.typing import NDArray |
| 15 | + |
| 16 | + |
| 17 | +def debug_compare_raw_vs_processed(segy_file, trace_index=0): |
| 18 | + """Debug function to compare raw filesystem data vs processed data.""" |
| 19 | + from segy.indexing import HeaderIndexer |
| 20 | + |
| 21 | + # Create a fresh indexer to get raw data |
| 22 | + indexer = HeaderIndexer( |
| 23 | + segy_file.fs, |
| 24 | + segy_file.url, |
| 25 | + segy_file.spec.trace, |
| 26 | + segy_file.num_traces, |
| 27 | + transform_pipeline=None # No transforms = raw data |
| 28 | + ) |
| 29 | + |
| 30 | + # Get raw data directly from filesystem |
| 31 | + raw_data = indexer[trace_index] |
| 32 | + |
| 33 | + # Get processed data with transforms |
| 34 | + processed_data = segy_file.header[trace_index] |
| 35 | + |
| 36 | + print("=== Raw vs Processed Comparison ===") |
| 37 | + print(f"Raw data shape: {raw_data.shape}") |
| 38 | + print(f"Processed data shape: {processed_data.shape}") |
| 39 | + |
| 40 | + if hasattr(raw_data, 'dtype') and raw_data.dtype.names: |
| 41 | + if 'inline_number' in raw_data.dtype.names: |
| 42 | + print(f"Raw inline_number: {raw_data['inline_number']}") |
| 43 | + print(f"Raw inline_number (hex): {raw_data['inline_number']:08x}") |
| 44 | + print(f"Processed inline_number: {processed_data['inline_number']}") |
| 45 | + print(f"Processed inline_number (hex): {processed_data['inline_number']:08x}") |
| 46 | + print(f"Are they equal? {raw_data['inline_number'] == processed_data['inline_number']}") |
| 47 | + |
| 48 | + return raw_data, processed_data |
| 49 | + |
| 50 | + |
| 51 | +class HeaderRawTransformedAccessor: |
| 52 | + """Utility class to access both raw and transformed header data with single filesystem read. |
| 53 | +
|
| 54 | + This class works as a consumer of SegyFile objects without modifying the package. |
| 55 | + It achieves the goal by: |
| 56 | + 1. Reading raw data from filesystem once |
| 57 | + 2. Applying transforms to get transformed data |
| 58 | + 3. Keeping both versions available |
| 59 | +
|
| 60 | + The transforms used in SEG-Y processing are reversible: |
| 61 | + - ByteSwapTransform: Self-inverse (swapping twice returns to original) |
| 62 | + - IbmFloatTransform: Can be reversed by swapping direction |
| 63 | + """ |
| 64 | + |
| 65 | + def __init__(self, segy_file: SegyFile): |
| 66 | + """Initialize with a SegyFile instance. |
| 67 | +
|
| 68 | + Args: |
| 69 | + segy_file: The SegyFile instance to work with |
| 70 | + """ |
| 71 | + self.segy_file = segy_file |
| 72 | + self.header_indexer = segy_file.header |
| 73 | + self.transform_pipeline = self.header_indexer.transform_pipeline |
| 74 | + |
| 75 | + # Debug: Print transform pipeline information |
| 76 | + import sys |
| 77 | + print(f"Debug: System endianness: {sys.byteorder}") |
| 78 | + print(f"Debug: File endianness: {self.segy_file.spec.endianness}") |
| 79 | + print(f"Debug: Transform pipeline has {len(self.transform_pipeline.transforms)} transforms:") |
| 80 | + for i, transform in enumerate(self.transform_pipeline.transforms): |
| 81 | + print(f" Transform {i}: {type(transform).__name__}") |
| 82 | + if hasattr(transform, 'target_order'): |
| 83 | + print(f" Target order: {transform.target_order}") |
| 84 | + if hasattr(transform, 'direction'): |
| 85 | + print(f" Direction: {transform.direction}") |
| 86 | + if hasattr(transform, 'keys'): |
| 87 | + print(f" Keys: {transform.keys}") |
| 88 | + |
| 89 | + def get_raw_and_transformed( |
| 90 | + self, indices: int | list[int] | np.ndarray | slice |
| 91 | + ) -> tuple[NDArray, NDArray]: |
| 92 | + """Get both raw and transformed header data with single filesystem read. |
| 93 | +
|
| 94 | + Args: |
| 95 | + indices: Which headers to retrieve (int, list, ndarray, or slice) |
| 96 | +
|
| 97 | + Returns: |
| 98 | + Tuple of (raw_headers, transformed_headers) |
| 99 | + """ |
| 100 | + # Get the transformed data using the normal API |
| 101 | + # This reads from filesystem and applies transforms |
| 102 | + transformed_data = self.header_indexer[indices] |
| 103 | + |
| 104 | + print(f"Debug: Transformed data shape: {transformed_data.shape}") |
| 105 | + if hasattr(transformed_data, 'dtype') and transformed_data.dtype.names: |
| 106 | + print(f"Debug: Transformed data dtype names: {transformed_data.dtype.names[:5]}...") # First 5 fields |
| 107 | + if 'inline_number' in transformed_data.dtype.names: |
| 108 | + print(f"Debug: First transformed inline_number: {transformed_data['inline_number'][0]}") |
| 109 | + print(f"Debug: First transformed inline_number (hex): {transformed_data['inline_number'][0]:08x}") |
| 110 | + |
| 111 | + # Now reverse the transforms to get back to raw data |
| 112 | + raw_data = self._reverse_transforms(transformed_data) |
| 113 | + |
| 114 | + print(f"Debug: Raw data shape: {raw_data.shape}") |
| 115 | + if hasattr(raw_data, 'dtype') and raw_data.dtype.names: |
| 116 | + if 'inline_number' in raw_data.dtype.names: |
| 117 | + print(f"Debug: First raw inline_number: {raw_data['inline_number'][0]}") |
| 118 | + print(f"Debug: First raw inline_number (hex): {raw_data['inline_number'][0]:08x}") |
| 119 | + |
| 120 | + return raw_data, transformed_data |
| 121 | + |
| 122 | + def _reverse_transforms(self, transformed_data: NDArray) -> NDArray: |
| 123 | + """Reverse the transform pipeline to get raw data from transformed data. |
| 124 | +
|
| 125 | + Args: |
| 126 | + transformed_data: Data that has been processed through the transform pipeline |
| 127 | +
|
| 128 | + Returns: |
| 129 | + Raw data equivalent to what was read directly from filesystem |
| 130 | + """ |
| 131 | + # Start with the transformed data |
| 132 | + raw_data = transformed_data.copy() if hasattr(transformed_data, 'copy') else transformed_data |
| 133 | + |
| 134 | + print(f"Debug: Starting reversal with {len(self.transform_pipeline.transforms)} transforms") |
| 135 | + |
| 136 | + # Apply transforms in reverse order with reversed operations |
| 137 | + for i, transform in enumerate(reversed(self.transform_pipeline.transforms)): |
| 138 | + print(f"Debug: Reversing transform {len(self.transform_pipeline.transforms)-1-i}: {type(transform).__name__}") |
| 139 | + if 'inline_number' in raw_data.dtype.names: |
| 140 | + print(f"Debug: Before reversal - inline_number: {raw_data['inline_number'][0]:08x}") |
| 141 | + raw_data = self._reverse_single_transform(raw_data, transform) |
| 142 | + if 'inline_number' in raw_data.dtype.names: |
| 143 | + print(f"Debug: After reversal - inline_number: {raw_data['inline_number'][0]:08x}") |
| 144 | + |
| 145 | + return raw_data |
| 146 | + |
| 147 | + def _reverse_single_transform(self, data: NDArray, transform: Transform) -> NDArray: |
| 148 | + """Reverse a single transform operation. |
| 149 | +
|
| 150 | + Args: |
| 151 | + data: The data to reverse transform |
| 152 | + transform: The transform to reverse |
| 153 | +
|
| 154 | + Returns: |
| 155 | + Data with the transform reversed |
| 156 | + """ |
| 157 | + # Import here to avoid circular imports |
| 158 | + from segy.transforms import get_endianness |
| 159 | + from segy.schema import Endianness |
| 160 | + |
| 161 | + if isinstance(transform, ByteSwapTransform): |
| 162 | + # For byte swap, we need to reverse the endianness conversion |
| 163 | + # If the transform was converting to little-endian, we need to convert back to big-endian |
| 164 | + print(f"Debug: Reversing byte swap (target was: {transform.target_order})") |
| 165 | + |
| 166 | + # Get current data endianness |
| 167 | + current_endianness = get_endianness(data) |
| 168 | + print(f"Debug: Current data endianness: {current_endianness}") |
| 169 | + |
| 170 | + # If transform was converting TO little-endian, we need to convert TO big-endian |
| 171 | + if transform.target_order == Endianness.LITTLE: |
| 172 | + reverse_target = Endianness.BIG |
| 173 | + else: |
| 174 | + reverse_target = Endianness.LITTLE |
| 175 | + |
| 176 | + print(f"Debug: Reversing to target: {reverse_target}") |
| 177 | + reverse_transform = ByteSwapTransform(reverse_target) |
| 178 | + result = reverse_transform.apply(data) |
| 179 | + |
| 180 | + if 'inline_number' in data.dtype.names: |
| 181 | + print(f"Debug: Byte swap reversal - before: {data['inline_number'][0]:08x}, after: {result['inline_number'][0]:08x}") |
| 182 | + return result |
| 183 | + |
| 184 | + elif isinstance(transform, IbmFloatTransform): |
| 185 | + # Reverse IBM float conversion by swapping direction |
| 186 | + reverse_direction = "to_ibm" if transform.direction == "to_ieee" else "to_ieee" |
| 187 | + print(f"Debug: Applying IBM float reversal (direction: {transform.direction} -> {reverse_direction})") |
| 188 | + reverse_transform = IbmFloatTransform(reverse_direction, transform.keys) |
| 189 | + return reverse_transform.apply(data) |
| 190 | + |
| 191 | + else: |
| 192 | + # For unknown transforms, return data unchanged |
| 193 | + # This maintains compatibility if new transforms are added |
| 194 | + print(f"Warning: Unknown transform type {type(transform).__name__}, cannot reverse") |
| 195 | + return data |
| 196 | + |
| 197 | + |
| 198 | +def get_header_raw_and_transformed( |
| 199 | + segy_file: SegyFile, |
| 200 | + indices: int | list[int] | np.ndarray | slice |
| 201 | +) -> tuple[NDArray, NDArray]: |
| 202 | + """Convenience function to get both raw and transformed header data. |
| 203 | +
|
| 204 | + This is a drop-in replacement that provides the functionality you requested |
| 205 | + without modifying the segy package. |
| 206 | +
|
| 207 | + Args: |
| 208 | + segy_file: The SegyFile instance |
| 209 | + indices: Which headers to retrieve |
| 210 | +
|
| 211 | + Returns: |
| 212 | + Tuple of (raw_headers, transformed_headers) |
| 213 | +
|
| 214 | + Example: |
| 215 | + from header_raw_transformed_accessor import get_header_raw_and_transformed |
| 216 | +
|
| 217 | + # Single header |
| 218 | + raw_hdr, transformed_hdr = get_header_raw_and_transformed(segy_file, 0) |
| 219 | +
|
| 220 | + # Multiple headers |
| 221 | + raw_hdrs, transformed_hdrs = get_header_raw_and_transformed(segy_file, [0, 1, 2]) |
| 222 | +
|
| 223 | + # Slice of headers |
| 224 | + raw_hdrs, transformed_hdrs = get_header_raw_and_transformed(segy_file, slice(0, 10)) |
| 225 | + """ |
| 226 | + accessor = HeaderRawTransformedAccessor(segy_file) |
| 227 | + return accessor.get_raw_and_transformed(indices) |
0 commit comments