@@ -659,6 +659,8 @@ def segy_to_mdio_schematized(
659659
660660 ds = MDIO .open (mdio_path_or_buffer ) # Reopen because we needed to do some weird stuff (hacky)
661661
662+ seismic_coords = [name for name in ds .seismic .coords if name not in ds .seismic .dims ]
663+
662664 try :
663665 dims = get_dims (ds , segy_schema )
664666 except ValueError as e :
@@ -686,11 +688,8 @@ def segy_to_mdio_schematized(
686688 storage_options_input = storage_options_input or {}
687689 storage_options_output = storage_options_output or {}
688690
689- mdio_spec = (
690- mdio_segy_spec ()
691- ) # TODO: I think this may need to be updated to work with our new input schemas
691+ mdio_spec = mdio_segy_spec ()
692692 segy_settings = SegySettings (storage_options = storage_options_input )
693- # segy = SegyFile(url=segy_path, spec=mdio_spec, settings=segy_settings)
694693 segy = SegyFile (url = segy_schema ["path" ], spec = mdio_spec , settings = segy_settings )
695694
696695 text_header = segy .text_header
@@ -701,30 +700,49 @@ def segy_to_mdio_schematized(
701700 index_fields = []
702701 for name , byte , format_ in zip (index_names , index_bytes , index_types , strict = True ):
703702 index_fields .append (HeaderField (name = name , byte = byte , format = format_ ))
703+
704+ # Add the coordinates as pseudo-index_fields.
705+ # This allows us to sneakily grab them from the grid builder (I hope)
706+ # First, we need to extract the index bytes and index types from the segy_schema.
707+ coord_index_names = []
708+ coord_index_bytes = []
709+ coord_index_types = []
710+ for coord in segy_schema ["trace" ]["header_entries" ]:
711+ if coord ["name" ] in seismic_coords :
712+ coord_index_names .append (coord ["name" ])
713+ coord_index_bytes .append (coord ["byte_start" ])
714+ coord_index_types .append (coord ["format" ])
715+
716+ for newName , newByte , newFormat in zip (coord_index_names , coord_index_bytes , coord_index_types , strict = True ):
717+ index_fields .append (HeaderField (name = newName , byte = newByte , format = newFormat ))
718+
704719 mdio_spec_grid = mdio_spec .customize (trace_header_fields = index_fields )
705720 segy_grid = SegyFile (url = segy_schema ["path" ], spec = mdio_spec_grid , settings = segy_settings )
706-
707721 dimensions , chunksize , index_headers = get_grid_plan (
708722 segy_file = segy_grid ,
709723 return_headers = True ,
710724 chunksize = chunksize ,
711725 grid_overrides = grid_overrides ,
712726 )
727+ # Override the "sample" dimension name
713728 dimensions [- 1 ].name = get_sample_name (ds , dimensions )
714- # print(dimensions)
729+
730+ ds_coords = [dim for dim in dimensions if dim .name in seismic_coords ]
731+ dimensions = [dim for dim in dimensions if dim .name not in seismic_coords ]
732+
715733 grid = Grid (dims = dimensions )
716734 grid_density_qc (grid , num_traces )
717735 grid .build_map (index_headers )
718736
719- # Override the "sample" dimension name
737+ # print(f"dimensions: {dimensions}")
738+ # print(f"chunksize: {chunksize}")
739+ # print(f"index_headers: {index_headers}")
720740
721741 # Set dimension coordinates
722742 new_coords = {dim .name : dim .coords for dim in dimensions }
723743 ds = ds .assign_coords (new_coords )
724744 ds .to_mdio (store = mdio_path_or_buffer , mode = "r+" )
725745
726- # Set all coordinates which are not dimensions, root Variables, or live_mask
727-
728746 # Check grid validity by ensuring every trace's header-index is within dimension bounds
729747 valid_mask = np .ones (grid .num_traces , dtype = bool )
730748 for d_idx in range (len (grid .header_index_arrays )):
@@ -744,8 +762,6 @@ def segy_to_mdio_schematized(
744762 del valid_mask
745763 gc .collect ()
746764
747- coords = ds .seismic .coords # TODO: We also need to iterate over the coords and assign their values in parallel with the live_mask
748-
749765 # live_mask_array = ds.live_mask # TODO: Make this more robust
750766 live_mask_array = ds .trace_mask
751767 from mdio .core .v1 ._overloads import MDIODataArray
@@ -756,8 +772,6 @@ def segy_to_mdio_schematized(
756772
757773 chunker = ChunkIterator (live_mask_array , chunk_samples = True )
758774 for chunk_indices in chunker :
759- # print(f"chunk_indices: {chunk_indices}")
760- # chunk_indices is a tuple of N–1 slice objects
761775 trace_ids = grid .get_traces_for_chunk (chunk_indices )
762776 if trace_ids .size == 0 :
763777 # Free memory immediately for empty chunks
@@ -782,7 +796,6 @@ def segy_to_mdio_schematized(
782796 if local_idx .dtype != np .intp :
783797 local_idx = local_idx .astype (np .intp )
784798 local_coords .append (local_idx )
785- # local_idx is now owned by local_coords list, safe to continue
786799
787800 # Free trace_ids as soon as we're done with it
788801 del trace_ids
@@ -794,13 +807,10 @@ def segy_to_mdio_schematized(
794807 del local_coords
795808
796809 # Write the entire block to Zarr at once
797- # live_mask_array.isel(chunk_indices).values[:] = block
798810 live_mask_array [chunk_indices ] = block
799811
800812 # Free block immediately after writing
801813 del block
802-
803- # Force garbage collection periodically to free memory aggressively
804814 gc .collect ()
805815
806816 # Save the entire dataset to persist the live_mask changes
0 commit comments