@@ -3,12 +3,17 @@ import { unzip, unzipChunkSlice } from '@gmod/bgzf-filehandle'
33import LRU from '@jbrowse/quick-lru'
44import { LocalFile , RemoteFile } from 'generic-filehandle2'
55
6- import Chunk from './chunk.ts'
76import CSI from './csi.ts'
8- import IndexFile , { Options } from './indexFile.ts'
7+ import IndexFile from './indexFile.ts'
98import TBI from './tbi.ts'
109
10+ import type Chunk from './chunk.ts'
1111import type { GenericFilehandle } from 'generic-filehandle2'
12+ import type { Options } from './indexFile.ts'
13+
14+ const TAB = 9
15+ const NEWLINE = 10
16+ const SEMICOLON = 59
1217
1318type GetLinesCallback = (
1419 line : string ,
@@ -29,9 +34,6 @@ interface ReadChunk {
2934 dpositions : number [ ]
3035}
3136
32- const TAB = 9
33- const NEWLINE = 10
34-
3537export default class TabixIndexedFile {
3638 private filehandle : GenericFilehandle
3739 private index : IndexFile
@@ -418,59 +420,51 @@ export default class TabixIndexedFile {
418420 const refLen = refEnd - refStart
419421 let endCoordinate = startCoordinate + refLen
420422
421- // Check for SVTYPE=TRA - look for 'S' (83) then verify
422- const S = 83
423- let pos = infoStart
424- while ( pos <= infoEnd - 10 ) {
425- const idx = buffer . indexOf ( S , pos )
426- if ( idx === - 1 || idx > infoEnd - 10 ) {
427- break
428- }
429- if (
430- buffer [ idx + 1 ] === 86 && // V
431- buffer [ idx + 2 ] === 84 && // T
432- buffer [ idx + 3 ] === 89 && // Y
433- buffer [ idx + 4 ] === 80 && // P
434- buffer [ idx + 5 ] === 69 && // E
435- buffer [ idx + 6 ] === 61 && // =
436- buffer [ idx + 7 ] === 84 && // T
437- buffer [ idx + 8 ] === 82 && // R
438- buffer [ idx + 9 ] === 65 // A
439- ) {
440- return startCoordinate + 1
441- }
442- pos = idx + 1
423+ if ( buffer [ infoStart ] === 46 ) {
424+ // INFO is '.', no fields to check
425+ return endCoordinate
443426 }
444427
445- // Check for END=
446- if ( buffer [ infoStart ] !== 46 ) {
447- // not '.'
448- const E = 69
449- const SEMICOLON = 59
450- pos = infoStart
451- while ( pos <= infoEnd - 4 ) {
452- const idx = buffer . indexOf ( E , pos )
453- if ( idx === - 1 || idx > infoEnd - 4 ) {
454- break
428+ // Single pass: walk semicolon-delimited fields checking prefixes.
429+ // This avoids repeated indexOf scans for common bytes like 'S' and 'E'
430+ // that produce many false positives in typical INFO fields.
431+ let fieldStart = infoStart
432+ for ( let i = infoStart ; i <= infoEnd ; i ++ ) {
433+ if ( i === infoEnd || buffer [ i ] === SEMICOLON ) {
434+ const fieldLen = i - fieldStart
435+ if (
436+ fieldLen >= 10 &&
437+ buffer [ fieldStart ] === 83 && // S
438+ buffer [ fieldStart + 1 ] === 86 && // V
439+ buffer [ fieldStart + 2 ] === 84 && // T
440+ buffer [ fieldStart + 3 ] === 89 && // Y
441+ buffer [ fieldStart + 4 ] === 80 && // P
442+ buffer [ fieldStart + 5 ] === 69 && // E
443+ buffer [ fieldStart + 6 ] === 61 && // =
444+ buffer [ fieldStart + 7 ] === 84 && // T
445+ buffer [ fieldStart + 8 ] === 82 && // R
446+ buffer [ fieldStart + 9 ] === 65 // A
447+ ) {
448+ return startCoordinate + 1
455449 }
456450 if (
457- ( idx === infoStart || buffer [ idx - 1 ] === SEMICOLON ) &&
458- buffer [ idx + 1 ] === 78 && // N
459- buffer [ idx + 2 ] === 68 && // D
460- buffer [ idx + 3 ] === 61 // =
451+ fieldLen >= 4 &&
452+ buffer [ fieldStart ] === 69 && // E
453+ buffer [ fieldStart + 1 ] === 78 && // N
454+ buffer [ fieldStart + 2 ] === 68 && // D
455+ buffer [ fieldStart + 3 ] === 61 // =
461456 ) {
462457 endCoordinate = 0
463- for ( let k = idx + 4 ; k < infoEnd ; k ++ ) {
458+ for ( let k = fieldStart + 4 ; k < i ; k ++ ) {
464459 const c = buffer [ k ] !
465460 if ( c >= 48 && c <= 57 ) {
466461 endCoordinate = endCoordinate * 10 + ( c - 48 )
467- } else if ( c === SEMICOLON ) {
462+ } else {
468463 break
469464 }
470465 }
471- break
472466 }
473- pos = idx + 1
467+ fieldStart = i + 1
474468 }
475469 }
476470 return endCoordinate
0 commit comments