1414 from numpy .typing import NDArray
1515
1616
17- def debug_compare_raw_vs_processed (segy_file , trace_index = 0 ):
18- """Debug function to compare raw filesystem data vs processed data."""
19- from segy .indexing import HeaderIndexer
20-
21- # Create a fresh indexer to get raw data
22- indexer = HeaderIndexer (
23- segy_file .fs ,
24- segy_file .url ,
25- segy_file .spec .trace ,
26- segy_file .num_traces ,
27- transform_pipeline = None # No transforms = raw data
28- )
29-
30- # Get raw data directly from filesystem
31- raw_data = indexer [trace_index ]
32-
33- # Get processed data with transforms
34- processed_data = segy_file .header [trace_index ]
35-
36- return raw_data , processed_data
37-
38-
3917class HeaderRawTransformedAccessor :
4018 """Utility class to access both raw and transformed header data with single filesystem read.
4119
@@ -57,28 +35,7 @@ def __init__(self, segy_file: SegyFile):
5735 segy_file: The SegyFile instance to work with
5836 """
5937 self .segy_file = segy_file
60- self .header_indexer = segy_file .header
61- self .transform_pipeline = self .header_indexer .transform_pipeline
62-
63- def get_raw_and_transformed (
64- self , indices : int | list [int ] | np .ndarray | slice
65- ) -> tuple [NDArray , NDArray ]:
66- """Get both raw and transformed header data with single filesystem read.
67-
68- Args:
69- indices: Which headers to retrieve (int, list, ndarray, or slice)
70-
71- Returns:
72- Tuple of (raw_headers, transformed_headers)
73- """
74- # Get the transformed data using the normal API
75- # This reads from filesystem and applies transforms
76- transformed_data = self .header_indexer [indices ]
77-
78- # Now reverse the transforms to get back to raw data
79- raw_data = self ._reverse_transforms (transformed_data )
80-
81- return raw_data , transformed_data
38+ self .transform_pipeline = self .segy_file .header .transform_pipeline
8239
8340 def _reverse_transforms (self , transformed_data : NDArray ) -> NDArray :
8441 """Reverse the transform pipeline to get raw data from transformed data.
@@ -95,52 +52,51 @@ def _reverse_transforms(self, transformed_data: NDArray) -> NDArray:
9552
9653 # Apply transforms in reverse order with reversed operations
9754 for i , transform in enumerate (reversed (self .transform_pipeline .transforms )):
98- raw_data = self . _reverse_single_transform (raw_data , transform )
55+ raw_data = _reverse_single_transform (raw_data , transform )
9956
10057 return raw_data
10158
102- def _reverse_single_transform (self , data : NDArray , transform : Transform ) -> NDArray :
103- """Reverse a single transform operation.
104-
105- Args:
106- data: The data to reverse transform
107- transform: The transform to reverse
108-
109- Returns:
110- Data with the transform reversed
111- """
112- # Import here to avoid circular imports
113- from segy .transforms import get_endianness
114- from segy .schema import Endianness
115-
116- if isinstance (transform , ByteSwapTransform ):
117- # For byte swap, we need to reverse the endianness conversion
118- # If the transform was converting to little-endian, we need to convert back to big-endian
59+ @profile
60+ def _reverse_single_transform (data : NDArray , transform : Transform ) -> NDArray :
61+ """Reverse a single transform operation.
11962
120- # Get current data endianness
121- current_endianness = get_endianness (data )
63+ Args:
64+ data: The data to reverse transform
65+ transform: The transform to reverse
12266
123- # If transform was converting TO little-endian, we need to convert TO big-endian
124- if transform .target_order == Endianness .LITTLE :
125- reverse_target = Endianness .BIG
126- else :
127- reverse_target = Endianness .LITTLE
67+ Returns:
68+ Data with the transform reversed
69+ """
70+ # Import here to avoid circular imports
71+ from segy .transforms import get_endianness
72+ from segy .schema import Endianness
73+
74+ if isinstance (transform , ByteSwapTransform ):
75+ # For byte swap, we need to reverse the endianness conversion
76+ # If the transform was converting to little-endian, we need to convert back to big-endian
77+
78+ # If transform was converting TO little-endian, we need to convert TO big-endian
79+ # TODO: I don't think this is correct
80+ if transform .target_order == Endianness .LITTLE :
81+ reverse_target = Endianness .BIG
82+ else :
83+ reverse_target = Endianness .LITTLE
12884
129- reverse_transform = ByteSwapTransform (reverse_target )
130- result = reverse_transform .apply (data )
85+ reverse_transform = ByteSwapTransform (reverse_target )
86+ result = reverse_transform .apply (data )
13187
132- return result
88+ return result
13389
134- elif isinstance (transform , IbmFloatTransform ):
135- # Reverse IBM float conversion by swapping direction
136- reverse_direction = "to_ibm" if transform .direction == "to_ieee" else "to_ieee"
137- reverse_transform = IbmFloatTransform (reverse_direction , transform .keys )
138- return reverse_transform .apply (data )
90+ elif isinstance (transform , IbmFloatTransform ):
91+ # Reverse IBM float conversion by swapping direction
92+ reverse_direction = "to_ibm" if transform .direction == "to_ieee" else "to_ieee"
93+ reverse_transform = IbmFloatTransform (reverse_direction , transform .keys )
94+ return reverse_transform .apply (data )
13995
140- else :
141- # For unknown transforms, return data unchanged
142- # This maintains compatibility if new transforms are added
143- return data
96+ else :
97+ # For unknown transforms, return data unchanged
98+ # This maintains compatibility if new transforms are added
99+ return data
144100
145101
146102def get_header_raw_and_transformed (
@@ -171,5 +127,53 @@ def get_header_raw_and_transformed(
171127 # Slice of headers
172128 raw_hdrs, transformed_hdrs = get_header_raw_and_transformed(segy_file, slice(0, 10))
173129 """
174- accessor = HeaderRawTransformedAccessor (segy_file )
175- return accessor .get_raw_and_transformed (indices )
130+ return _get_header_raw_optimized (segy_file , indices )
131+
132+ @profile
133+ def _get_header_raw_optimized (
134+ segy_file : SegyFile ,
135+ indices : int | list [int ] | np .ndarray | slice
136+ ) -> tuple [NDArray , NDArray ]:
137+ """Ultra-optimized function that eliminates double disk reads entirely.
138+
139+ This function:
140+ 1. Gets transformed headers using the normal API (single disk read)
141+ 2. Reverses the transforms on the already-loaded data (no second disk read)
142+ 3. Returns both raw and transformed headers
143+
144+ Args:
145+ segy_file: The SegyFile instance
146+ indices: Which headers to retrieve
147+
148+ Returns:
149+ Tuple of (raw_headers, transformed_headers) where transformed_headers
150+ is the same as what segy_file.header[indices] would return
151+ """
152+ # Get transformed headers using the normal API (single disk read)
153+ transformed_headers = segy_file .header [indices ]
154+
155+ # Reverse the transforms on the already-loaded transformed data
156+ # This eliminates the second disk read entirely!
157+ raw_headers = _reverse_transforms (transformed_headers , segy_file .header .transform_pipeline )
158+
159+ return raw_headers , transformed_headers
160+
161+ @profile
162+ def _reverse_transforms (transformed_data : NDArray , transform_pipeline ) -> NDArray :
163+ """Reverse the transform pipeline to get raw data from transformed data.
164+
165+ Args:
166+ transformed_data: Data that has been processed through the transform pipeline
167+ transform_pipeline: The transform pipeline to reverse
168+
169+ Returns:
170+ Raw data equivalent to what was read directly from filesystem
171+ """
172+ # Start with the transformed data
173+ raw_data = transformed_data .copy () if hasattr (transformed_data , 'copy' ) else transformed_data
174+
175+ # Apply transforms in reverse order with reversed operations
176+ for transform in reversed (transform_pipeline .transforms ):
177+ raw_data = _reverse_single_transform (raw_data , transform )
178+
179+ return raw_data
0 commit comments