@@ -35,72 +35,84 @@ export default class Trix {
3535 }
3636
3737 async search ( searchString : string , opts ?: { signal ?: AbortSignal } ) {
38- let resultArr = [ ] as [ string , string ] [ ]
39- const searchWords = searchString . split ( ' ' )
38+ const searchWords = searchString . split ( / \s + / )
4039 const firstWord = searchWords [ 0 ]
4140
4241 // validate that we have a non-empty search term
43- if ( firstWord ) {
44- const searchWord = firstWord . toLowerCase ( )
45- const res = await this . getBuffer ( searchWord , opts )
46-
47- let { end, buffer } = res
48- const { fileSize } = res
49- let done = false
50- // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
51- while ( ! done ) {
52- const str = this . decoder . decode ( buffer )
53-
54- // slice to lastIndexOf('\n') to make sure we get complete records
55- // since the buffer fetch could get halfway into a record
42+ if ( ! firstWord ) {
43+ return [ ]
44+ }
45+
46+ const searchWord = firstWord . toLowerCase ( )
47+ const res = await this . getBuffer ( searchWord , opts )
48+
49+ let { end, buffer } = res
50+ const { fileSize } = res
51+ let resultArr = [ ] as [ string , string ] [ ]
52+ let done = false
53+
54+ // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
55+ while ( ! done ) {
56+ const str = this . decoder . decode ( buffer , { stream : true } )
57+
58+ // slice to lastIndexOf('\n') to make sure we get complete records
59+ // since the buffer fetch could get halfway into a record
60+ const lastNewline = str . lastIndexOf ( '\n' )
61+ if ( lastNewline === - 1 ) {
62+ // if no newline, we need more data unless we're at EOF
63+ if ( fileSize !== undefined && end >= fileSize ) {
64+ done = true
65+ }
66+ } else {
5667 const lines = str
57- . slice ( 0 , str . lastIndexOf ( '\n' ) )
68+ . slice ( 0 , lastNewline )
5869 . split ( '\n' )
5970 . filter ( Boolean )
6071
61- const hits2 = [ ] as string [ ]
6272 for ( const line of lines ) {
6373 const word = line . split ( ' ' ) [ 0 ]
6474
6575 if ( word . startsWith ( searchWord ) ) {
66- hits2 . push ( line )
76+ const [ term , ...parts ] = line . split ( ' ' )
77+ const hits = parts
78+ . filter ( Boolean )
79+ . map ( elt => [ term , elt . split ( ',' ) [ 0 ] ] as [ string , string ] )
80+ resultArr = resultArr . concat ( hits )
6781 } else if ( word > searchWord ) {
6882 // we are done scanning if we are lexicographically greater than
6983 // the search string
7084 done = true
85+ break
7186 }
7287 }
73- const hits = hits2 . flatMap ( line => {
74- const [ term , ...parts ] = line . split ( ' ' )
75- return parts
76- . filter ( Boolean )
77- . map ( elt => [ term , elt . split ( ',' ) [ 0 ] ] as [ string , string ] )
78- } )
79-
80- resultArr = resultArr . concat ( hits )
81-
82- // if we are done or have filled up maxResults, break
83- if ( done || resultArr . length >= this . maxResults ) {
84- break
85- }
88+ }
8689
87- // avoid reading past end of file
88- if ( fileSize !== undefined && end >= fileSize ) {
89- break
90- }
90+ // if we are done or have filled up maxResults, break
91+ if ( done || resultArr . length >= this . maxResults ) {
92+ break
93+ }
9194
92- // fetch more data, clamping to file size if known
93- let bytesToRead = CHUNK_SIZE
94- if ( fileSize !== undefined ) {
95- bytesToRead = Math . min ( CHUNK_SIZE , fileSize - end )
96- }
97- const res2 = await this . ixFile . read ( bytesToRead , end , opts )
98- if ( res2 . length === 0 ) {
99- break
100- }
101- buffer = concatUint8Array ( [ buffer , res2 ] )
102- end += res2 . length
95+ // avoid reading past end of file
96+ if ( fileSize !== undefined && end >= fileSize ) {
97+ break
98+ }
99+
100+ // fetch more data, clamping to file size if known
101+ let bytesToRead = CHUNK_SIZE
102+ if ( fileSize !== undefined ) {
103+ bytesToRead = Math . min ( CHUNK_SIZE , fileSize - end )
104+ }
105+
106+ if ( bytesToRead <= 0 ) {
107+ break
108+ }
109+
110+ const res2 = await this . ixFile . read ( bytesToRead , end , opts )
111+ if ( res2 . length === 0 ) {
112+ break
103113 }
114+ buffer = concatUint8Array ( [ buffer , res2 ] )
115+ end += res2 . length
104116 }
105117
106118 // de-duplicate results based on the detail column (resultArr[1])
@@ -130,23 +142,46 @@ export default class Trix {
130142 }
131143
132144 private async getBuffer ( searchWord : string , opts ?: { signal ?: AbortSignal } ) {
145+ const indexes = await this . getIndex ( opts )
146+
147+ // Binary search for the largest key <= searchWord
148+ let low = 0
149+ let high = indexes . length - 1
150+ let bestIndex = - 1
151+
152+ while ( low <= high ) {
153+ const mid = Math . floor ( ( low + high ) / 2 )
154+ if ( indexes [ mid ] [ 0 ] <= searchWord ) {
155+ bestIndex = mid
156+ low = mid + 1
157+ } else {
158+ high = mid - 1
159+ }
160+ }
161+
133162 let start = 0
134163 let end = CHUNK_SIZE
135- const indexes = await this . getIndex ( opts )
136- for ( const [ key , value ] of indexes ) {
137- const trimmedKey = key . slice ( 0 , searchWord . length )
138- if ( trimmedKey < searchWord ) {
139- start = value
140- end = value + CHUNK_SIZE
164+
165+ if ( bestIndex !== - 1 ) {
166+ start = indexes [ bestIndex ] [ 1 ]
167+ // The end should be the start of the NEXT index entry to cover the full range
168+ // where the word could exist. If it's the last index, read until EOF or start+CHUNK_SIZE.
169+ if ( bestIndex + 1 < indexes . length ) {
170+ end = indexes [ bestIndex + 1 ] [ 1 ]
171+ } else {
172+ const fileSize = await this . getIxFileSize ( opts )
173+ end = fileSize ?? start + CHUNK_SIZE
141174 }
142175 }
143176
144- const fileSize = await this . getIxFileSize ( opts )
145- if ( fileSize !== undefined ) {
146- end = Math . min ( end , fileSize )
177+ // Ensure we read at least one CHUNK_SIZE to handle cases where index entries are very close
178+ // or to ensure we have enough data to start with.
179+ if ( end - start < CHUNK_SIZE ) {
180+ const fileSize = await this . getIxFileSize ( opts )
181+ end = fileSize === undefined ? start + CHUNK_SIZE : Math . min ( start + CHUNK_SIZE , fileSize )
147182 }
148183
149184 const buffer = await this . ixFile . read ( end - start , start , opts )
150- return { buffer, end, fileSize }
185+ return { buffer, end, fileSize : await this . getIxFileSize ( opts ) }
151186 }
152187}
0 commit comments