@@ -10,6 +10,9 @@ const CHUNK_SIZE = 65536
1010const ADDRESS_SIZE = 10
1111
1212export default class Trix {
13+ private decoder = new TextDecoder ( 'utf8' )
14+ private indexCache ?: readonly ( readonly [ string , number ] ) [ ]
15+
1316 constructor (
1417 public ixxFile : GenericFilehandle ,
1518 public ixFile : GenericFilehandle ,
@@ -19,70 +22,61 @@ export default class Trix {
1922 async search ( searchString : string , opts ?: { signal ?: AbortSignal } ) {
2023 let resultArr = [ ] as [ string , string ] [ ]
2124 const searchWords = searchString . split ( ' ' )
25+ const firstWord = searchWords [ 0 ]
26+
27+ // validate that we have a non-empty search term
28+ if ( firstWord ) {
29+ const searchWord = firstWord . toLowerCase ( )
30+ const res = await this . getBuffer ( searchWord , opts )
31+
32+ if ( res ) {
33+ let { end, buffer } = res
34+ let done = false
35+ // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
36+ while ( ! done ) {
37+ const str = this . decoder . decode ( buffer )
38+
39+ // slice to lastIndexOf('\n') to make sure we get complete records
40+ // since the buffer fetch could get halfway into a record
41+ const lines = str
42+ . slice ( 0 , str . lastIndexOf ( '\n' ) )
43+ . split ( '\n' )
44+ . filter ( f => f )
45+
46+ const hits2 = [ ] as string [ ]
47+ for ( const line of lines ) {
48+ const word = line . split ( ' ' ) [ 0 ]
49+
50+ if ( word . startsWith ( searchWord ) ) {
51+ hits2 . push ( line )
52+ } else if ( word > searchWord ) {
53+ // we are done scanning if we are lexicographically greater than
54+ // the search string
55+ done = true
56+ }
57+ }
58+ const hits = hits2 . flatMap ( line => {
59+ const [ term , ...parts ] = line . split ( ' ' )
60+ return parts
61+ . filter ( elt => elt )
62+ . map ( elt => [ term , elt . split ( ',' ) [ 0 ] ] as [ string , string ] )
63+ } )
2264
23- // we only search one word at a time
24- const searchWord = searchWords [ 0 ] . toLowerCase ( )
25- const res = await this . _getBuffer ( searchWord , opts )
26- if ( ! res ) {
27- return [ ]
28- }
29-
30- let { end, buffer } = res
31- let done = false
32- const decoder = new TextDecoder ( 'utf8' )
33- // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
34- while ( ! done ) {
35- let foundSomething = false
36- const str = decoder . decode ( buffer )
37-
38- // slice to lastIndexOf('\n') to make sure we get complete records
39- // since the buffer fetch could get halfway into a record
40- const lines = str
41- . slice ( 0 , str . lastIndexOf ( '\n' ) )
42- . split ( '\n' )
43- . filter ( f => ! ! f )
44-
45- const hits2 = [ ] as string [ ]
46- for ( const line of lines ) {
47- const word = line . split ( ' ' ) [ 0 ]
48- const match = word . startsWith ( searchWord )
49- if ( ! foundSomething && match ) {
50- foundSomething = true
51- }
52-
53- // we are done scanning if we are lexicographically greater than the
54- // search string
55- if ( word . slice ( 0 , searchWord . length ) > searchWord ) {
56- done = true
57- }
58- if ( match ) {
59- hits2 . push ( line )
60- }
61- }
62- const hits = hits2 . flatMap ( line => {
63- const [ term , ...parts ] = line . split ( ' ' )
64- return parts . map ( elt => [ term , elt . split ( ',' ) [ 0 ] ] as [ string , string ] )
65- } )
66-
67- // if we are not done, and we haven't filled up maxResults with hits yet,
68- // then refetch
69- if ( resultArr . length + hits . length < this . maxResults && ! done ) {
70- const res2 = await this . ixFile . read ( CHUNK_SIZE , end , opts )
71-
72- // early break if empty response
73- if ( res2 . length === 0 ) {
7465 resultArr = resultArr . concat ( hits )
75- break
76- }
77- buffer = concatUint8Array ( [ buffer , res2 ] )
78- end += CHUNK_SIZE
79- }
8066
81- // if we have filled up the hits, or we are detected to be done via the
82- // filtering, then return
83- else if ( resultArr . length + hits . length >= this . maxResults || done ) {
84- resultArr = resultArr . concat ( hits )
85- break
67+ // if we are done or have filled up maxResults, break
68+ if ( done || resultArr . length >= this . maxResults ) {
69+ break
70+ }
71+
72+ // fetch more data
73+ const res2 = await this . ixFile . read ( CHUNK_SIZE , end , opts )
74+ if ( res2 . length === 0 ) {
75+ break
76+ }
77+ buffer = concatUint8Array ( [ buffer , res2 ] )
78+ end += CHUNK_SIZE
79+ }
8680 }
8781 }
8882
@@ -91,46 +85,40 @@ export default class Trix {
9185 }
9286
9387 private async getIndex ( opts ?: { signal ?: AbortSignal } ) {
88+ if ( this . indexCache ) {
89+ return this . indexCache
90+ }
9491 const file = await this . ixxFile . readFile ( {
9592 encoding : 'utf8' ,
9693 ...opts ,
9794 } )
98- return file
95+ const result = file
9996 . split ( '\n' )
100- . filter ( f => ! ! f )
97+ . filter ( f => f )
10198 . map ( line => {
10299 const p = line . length - ADDRESS_SIZE
103100 const prefix = line . slice ( 0 , p )
104101 const posStr = line . slice ( p )
105102 const pos = Number . parseInt ( posStr , 16 )
106103 return [ prefix , pos ] as const
107104 } )
105+ this . indexCache = result
106+ return result
108107 }
109108
110- private async _getBuffer (
111- searchWord : string ,
112- opts ?: { signal ?: AbortSignal } ,
113- ) {
109+ private async getBuffer ( searchWord : string , opts ?: { signal ?: AbortSignal } ) {
114110 let start = 0
115- let end = 65536
111+ let end = CHUNK_SIZE
116112 const indexes = await this . getIndex ( opts )
117113 for ( const [ key , value ] of indexes ) {
118114 const trimmedKey = key . slice ( 0 , searchWord . length )
119115 if ( trimmedKey < searchWord ) {
120116 start = value
121- end = value + 65536
117+ end = value + CHUNK_SIZE
122118 }
123119 }
124120
125- // Return the buffer and its end position in the file.
126- const len = end - start
127- if ( len < 0 ) {
128- return undefined
129- }
130- const buffer = await this . ixFile . read ( len , start , opts )
131- return {
132- buffer,
133- end,
134- }
121+ const buffer = await this . ixFile . read ( end - start , start , opts )
122+ return { buffer, end }
135123 }
136124}
0 commit comments