@@ -113,7 +113,6 @@ impl QueryMetadata {
113113 fn get_cigar_ops (
114114 & self ,
115115 paf_files : & [ String ] ,
116- paf_gzi_indices : & [ Option < bgzf:: gzi:: Index > ] ,
117116 ) -> Vec < CigarOp > {
118117 // Allocate space for cigar
119118 let mut cigar_buffer = vec ! [ 0 ; self . cigar_bytes] ;
@@ -124,15 +123,13 @@ impl QueryMetadata {
124123
125124 // Get reader and seek start of cigar str
126125 if [ ".gz" , ".bgz" ] . iter ( ) . any ( |e| paf_file. ends_with ( e) ) {
127- // Get the GZI index for the PAF file
128- let paf_gzi_index = paf_gzi_indices. get ( paf_file_index) . and_then ( Option :: as_ref) ;
129-
126+ // For compressed files, use virtual position directly
130127 let mut reader = bgzf:: io:: Reader :: new ( File :: open ( paf_file) . unwrap ( ) ) ;
131- reader
132- . seek_by_uncompressed_position ( paf_gzi_index. unwrap ( ) , self . cigar_offset ( ) )
133- . unwrap ( ) ;
128+ let virtual_position = bgzf:: VirtualPosition :: from ( self . cigar_offset ( ) ) ;
129+ reader. seek ( virtual_position) . unwrap ( ) ;
134130 reader. read_exact ( & mut cigar_buffer) . unwrap ( ) ;
135131 } else {
132+ // For uncompressed files, use byte offset
136133 let mut reader = File :: open ( paf_file) . unwrap ( ) ;
137134 reader. seek ( SeekFrom :: Start ( self . cigar_offset ( ) ) ) . unwrap ( ) ;
138135 reader. read_exact ( & mut cigar_buffer) . unwrap ( ) ;
@@ -285,36 +282,21 @@ impl SortedRanges {
285282pub struct Impg {
286283 pub trees : RwLock < TreeMap > ,
287284 pub seq_index : SequenceIndex ,
288- paf_files : Vec < String > , // List of all PAF files
289- paf_gzi_indices : Vec < Option < bgzf:: gzi:: Index > > , // Corresponding GZI indices
290- pub forest_map : ForestMap , // Forest map for lazy loading
291- index_file_path : String , // Path to the index file for lazy loading
285+ paf_files : Vec < String > , // List of all PAF files
286+ pub forest_map : ForestMap , // Forest map for lazy loading
287+ index_file_path : String , // Path to the index file for lazy loading
292288}
293289
294290impl Impg {
295291 pub fn from_multi_paf_records (
296292 records_by_file : & [ ( Vec < PartialPafRecord > , String ) ] ,
297293 seq_index : SequenceIndex ,
298294 ) -> Result < Self , ParseErr > {
299- // Use par_iter to process the files in parallel and collect both pieces of information
300- let ( paf_files, paf_gzi_indices) : ( Vec < String > , Vec < Option < bgzf:: gzi:: Index > > ) =
301- records_by_file
302- . par_iter ( )
303- . map ( |( _, paf_file) | {
304- let paf_gzi_index = if [ ".gz" , ".bgz" ] . iter ( ) . any ( |e| paf_file. ends_with ( e) ) {
305- let paf_gzi_file = paf_file. to_owned ( ) + ".gzi" ;
306- Some (
307- bgzf:: gzi:: fs:: read ( paf_gzi_file. clone ( ) )
308- . unwrap_or_else ( |_| panic ! ( "Could not open {paf_gzi_file}" ) ) ,
309- )
310- } else {
311- None
312- } ;
313-
314- // Return both values as a tuple
315- ( paf_file. clone ( ) , paf_gzi_index)
316- } )
317- . unzip ( ) ; // Separate the tuples into two vectors
295+ // Extract just the PAF file paths
296+ let paf_files: Vec < String > = records_by_file
297+ . par_iter ( )
298+ . map ( |( _, paf_file) | paf_file. clone ( ) )
299+ . collect ( ) ;
318300
319301 let intervals: FxHashMap < u32 , Vec < Interval < QueryMetadata > > > = records_by_file
320302 . par_iter ( )
@@ -373,7 +355,6 @@ impl Impg {
373355 trees : RwLock :: new ( trees) ,
374356 seq_index,
375357 paf_files,
376- paf_gzi_indices,
377358 forest_map : ForestMap :: new ( ) , // All trees are in memory, no need for forest map
378359 index_file_path : String :: new ( ) , // All trees are in memory, no need for index file path
379360 } )
@@ -556,27 +537,10 @@ impl Impg {
556537 )
557538 } ) ?;
558539
559- // Determine PAF GZI indices
560- let paf_gzi_indices = paf_files
561- . iter ( )
562- . map ( |paf_file| {
563- if [ ".gz" , ".bgz" ] . iter ( ) . any ( |e| paf_file. ends_with ( e) ) {
564- let paf_gzi_file = format ! ( "{paf_file}.gzi" ) ;
565- Some (
566- bgzf:: gzi:: fs:: read ( paf_gzi_file. clone ( ) )
567- . unwrap_or_else ( |_| panic ! ( "Could not open {paf_gzi_file}" ) ) ,
568- )
569- } else {
570- None
571- }
572- } )
573- . collect :: < Vec < _ > > ( ) ;
574-
575540 Ok ( Self {
576541 trees : RwLock :: new ( FxHashMap :: default ( ) ) , // Start with empty trees - load on demand
577542 seq_index,
578543 paf_files : paf_files. to_vec ( ) ,
579- paf_gzi_indices,
580544 forest_map,
581545 index_file_path,
582546 } )
@@ -631,7 +595,7 @@ impl Impg {
631595 metadata. query_end ,
632596 metadata. strand ( ) ,
633597 ) ,
634- & metadata. get_cigar_ops ( & self . paf_files , self . paf_gzi_indices . as_ref ( ) ) ,
598+ & metadata. get_cigar_ops ( & self . paf_files ) ,
635599 ) ;
636600 if let Some ( (
637601 adjusted_query_start,
@@ -777,7 +741,7 @@ impl Impg {
777741 metadata. query_end ,
778742 metadata. strand ( ) ,
779743 ) ,
780- & metadata. get_cigar_ops ( & self . paf_files , self . paf_gzi_indices . as_ref ( ) ) ,
744+ & metadata. get_cigar_ops ( & self . paf_files ) ,
781745 ) ;
782746
783747 if let Some ( (
@@ -1010,10 +974,7 @@ impl Impg {
1010974 metadata. query_end ,
1011975 metadata. strand ( ) ,
1012976 ) ,
1013- & metadata. get_cigar_ops (
1014- & self . paf_files ,
1015- self . paf_gzi_indices . as_ref ( ) ,
1016- ) ,
977+ & metadata. get_cigar_ops ( & self . paf_files ) ,
1017978 ) ;
1018979
1019980 if let Some ( (
0 commit comments