Skip to content

Commit 3ec3085

Browse files
authored
Small code simplifications and add tests (#22)
1 parent 9d9834e commit 3ec3085

File tree

11 files changed

+841
-923
lines changed

11 files changed

+841
-923
lines changed

jest.config.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@
22
module.exports = {
33
preset: 'ts-jest',
44
testEnvironment: 'node',
5-
};
5+
}

package.json

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,24 +20,25 @@
2020
"preversion": "yarn lint && yarn test --run && yarn build",
2121
"postbuild:es5": "echo '{\"type\": \"commonjs\"}' > dist/package.json",
2222
"postversion": "git push --follow-tags",
23-
"test": "vitest"
23+
"test": "vitest",
24+
"format": "prettier --write ."
2425
},
2526
"name": "@gmod/trix",
2627
"author": "Matt Morgan",
2728
"repository": "GMOD/trix-js",
2829
"devDependencies": {
29-
"@types/node": "^22.15.21",
30-
"@typescript-eslint/eslint-plugin": "^8.4.0",
31-
"@typescript-eslint/parser": "^8.4.0",
30+
"@types/node": "^25.0.9",
31+
"@typescript-eslint/eslint-plugin": "^8.53.0",
32+
"@typescript-eslint/parser": "^8.53.0",
3233
"eslint": "^9.0.0",
3334
"eslint-plugin-import": "^2.31.0",
3435
"eslint-plugin-unicorn": "^62.0.0",
35-
"generic-filehandle2": "^2.0.5",
36-
"prettier": "^3.3.3",
36+
"generic-filehandle2": "^2.0.18",
37+
"prettier": "^3.8.0",
3738
"rimraf": "^6.0.1",
3839
"typescript": "^5.7.0",
39-
"typescript-eslint": "^8.4.0",
40-
"vitest": "^4.0.6"
40+
"typescript-eslint": "^8.53.0",
41+
"vitest": "^4.0.17"
4142
},
4243
"publishConfig": {
4344
"access": "public"

src/index.ts

Lines changed: 67 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ const CHUNK_SIZE = 65536
1010
const ADDRESS_SIZE = 10
1111

1212
export default class Trix {
13+
private decoder = new TextDecoder('utf8')
14+
private indexCache?: readonly (readonly [string, number])[]
15+
1316
constructor(
1417
public ixxFile: GenericFilehandle,
1518
public ixFile: GenericFilehandle,
@@ -19,70 +22,61 @@ export default class Trix {
1922
async search(searchString: string, opts?: { signal?: AbortSignal }) {
2023
let resultArr = [] as [string, string][]
2124
const searchWords = searchString.split(' ')
25+
const firstWord = searchWords[0]
26+
27+
// validate that we have a non-empty search term
28+
if (firstWord) {
29+
const searchWord = firstWord.toLowerCase()
30+
const res = await this.getBuffer(searchWord, opts)
31+
32+
if (res) {
33+
let { end, buffer } = res
34+
let done = false
35+
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
36+
while (!done) {
37+
const str = this.decoder.decode(buffer)
38+
39+
// slice to lastIndexOf('\n') to make sure we get complete records
40+
// since the buffer fetch could get halfway into a record
41+
const lines = str
42+
.slice(0, str.lastIndexOf('\n'))
43+
.split('\n')
44+
.filter(f => f)
45+
46+
const hits2 = [] as string[]
47+
for (const line of lines) {
48+
const word = line.split(' ')[0]
49+
50+
if (word.startsWith(searchWord)) {
51+
hits2.push(line)
52+
} else if (word > searchWord) {
53+
// we are done scanning if we are lexicographically greater than
54+
// the search string
55+
done = true
56+
}
57+
}
58+
const hits = hits2.flatMap(line => {
59+
const [term, ...parts] = line.split(' ')
60+
return parts
61+
.filter(elt => elt)
62+
.map(elt => [term, elt.split(',')[0]] as [string, string])
63+
})
2264

23-
// we only search one word at a time
24-
const searchWord = searchWords[0].toLowerCase()
25-
const res = await this._getBuffer(searchWord, opts)
26-
if (!res) {
27-
return []
28-
}
29-
30-
let { end, buffer } = res
31-
let done = false
32-
const decoder = new TextDecoder('utf8')
33-
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
34-
while (!done) {
35-
let foundSomething = false
36-
const str = decoder.decode(buffer)
37-
38-
// slice to lastIndexOf('\n') to make sure we get complete records
39-
// since the buffer fetch could get halfway into a record
40-
const lines = str
41-
.slice(0, str.lastIndexOf('\n'))
42-
.split('\n')
43-
.filter(f => !!f)
44-
45-
const hits2 = [] as string[]
46-
for (const line of lines) {
47-
const word = line.split(' ')[0]
48-
const match = word.startsWith(searchWord)
49-
if (!foundSomething && match) {
50-
foundSomething = true
51-
}
52-
53-
// we are done scanning if we are lexicographically greater than the
54-
// search string
55-
if (word.slice(0, searchWord.length) > searchWord) {
56-
done = true
57-
}
58-
if (match) {
59-
hits2.push(line)
60-
}
61-
}
62-
const hits = hits2.flatMap(line => {
63-
const [term, ...parts] = line.split(' ')
64-
return parts.map(elt => [term, elt.split(',')[0]] as [string, string])
65-
})
66-
67-
// if we are not done, and we haven't filled up maxResults with hits yet,
68-
// then refetch
69-
if (resultArr.length + hits.length < this.maxResults && !done) {
70-
const res2 = await this.ixFile.read(CHUNK_SIZE, end, opts)
71-
72-
// early break if empty response
73-
if (res2.length === 0) {
7465
resultArr = resultArr.concat(hits)
75-
break
76-
}
77-
buffer = concatUint8Array([buffer, res2])
78-
end += CHUNK_SIZE
79-
}
8066

81-
// if we have filled up the hits, or we are detected to be done via the
82-
// filtering, then return
83-
else if (resultArr.length + hits.length >= this.maxResults || done) {
84-
resultArr = resultArr.concat(hits)
85-
break
67+
// if we are done or have filled up maxResults, break
68+
if (done || resultArr.length >= this.maxResults) {
69+
break
70+
}
71+
72+
// fetch more data
73+
const res2 = await this.ixFile.read(CHUNK_SIZE, end, opts)
74+
if (res2.length === 0) {
75+
break
76+
}
77+
buffer = concatUint8Array([buffer, res2])
78+
end += CHUNK_SIZE
79+
}
8680
}
8781
}
8882

@@ -91,46 +85,40 @@ export default class Trix {
9185
}
9286

9387
private async getIndex(opts?: { signal?: AbortSignal }) {
88+
if (this.indexCache) {
89+
return this.indexCache
90+
}
9491
const file = await this.ixxFile.readFile({
9592
encoding: 'utf8',
9693
...opts,
9794
})
98-
return file
95+
const result = file
9996
.split('\n')
100-
.filter(f => !!f)
97+
.filter(f => f)
10198
.map(line => {
10299
const p = line.length - ADDRESS_SIZE
103100
const prefix = line.slice(0, p)
104101
const posStr = line.slice(p)
105102
const pos = Number.parseInt(posStr, 16)
106103
return [prefix, pos] as const
107104
})
105+
this.indexCache = result
106+
return result
108107
}
109108

110-
private async _getBuffer(
111-
searchWord: string,
112-
opts?: { signal?: AbortSignal },
113-
) {
109+
private async getBuffer(searchWord: string, opts?: { signal?: AbortSignal }) {
114110
let start = 0
115-
let end = 65536
111+
let end = CHUNK_SIZE
116112
const indexes = await this.getIndex(opts)
117113
for (const [key, value] of indexes) {
118114
const trimmedKey = key.slice(0, searchWord.length)
119115
if (trimmedKey < searchWord) {
120116
start = value
121-
end = value + 65536
117+
end = value + CHUNK_SIZE
122118
}
123119
}
124120

125-
// Return the buffer and its end position in the file.
126-
const len = end - start
127-
if (len < 0) {
128-
return undefined
129-
}
130-
const buffer = await this.ixFile.read(len, start, opts)
131-
return {
132-
buffer,
133-
end,
134-
}
121+
const buffer = await this.ixFile.read(end - start, start, opts)
122+
return { buffer, end }
135123
}
136124
}

src/util.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
export function sum(array: Uint8Array[]) {
2-
let sum = 0
2+
let total = 0
33
for (const entry of array) {
4-
sum += entry.length
4+
total += entry.length
55
}
6-
return sum
6+
return total
77
}
88
export function concatUint8Array(args: Uint8Array[]) {
99
const mergedArray = new Uint8Array(sum(args))
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
2+
3+
exports[`Edge case handling > handles search term with trailing spaces 1`] = `
4+
[
5+
[
6+
"this",
7+
"id1",
8+
],
9+
[
10+
"this",
11+
"id2",
12+
],
13+
[
14+
"this",
15+
"id3",
16+
],
17+
]
18+
`;

test/dedupe.test.ts

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
import { describe, expect, it } from 'vitest'
2+
3+
import { dedupe } from '../src/dedupe.ts'
4+
5+
describe('dedupe', () => {
6+
it('returns empty array for empty input', () => {
7+
expect(dedupe([])).toEqual([])
8+
})
9+
10+
it('removes duplicate primitives', () => {
11+
expect(dedupe([1, 2, 2, 3, 3, 3])).toEqual([1, 2, 3])
12+
})
13+
14+
it('removes duplicate strings', () => {
15+
expect(dedupe(['a', 'b', 'a', 'c', 'b'])).toEqual(['a', 'b', 'c'])
16+
})
17+
18+
it('removes duplicate objects using default hasher', () => {
19+
const input = [{ x: 1 }, { x: 2 }, { x: 1 }, { x: 3 }]
20+
expect(dedupe(input)).toEqual([{ x: 1 }, { x: 2 }, { x: 3 }])
21+
})
22+
23+
it('uses custom hasher function', () => {
24+
const input = [
25+
{ id: 1, name: 'a' },
26+
{ id: 2, name: 'b' },
27+
{ id: 1, name: 'c' },
28+
]
29+
const result = dedupe(input, item => String(item.id))
30+
expect(result).toEqual([
31+
{ id: 1, name: 'a' },
32+
{ id: 2, name: 'b' },
33+
])
34+
})
35+
36+
it('preserves order of first occurrence', () => {
37+
expect(dedupe([3, 1, 2, 1, 3, 2])).toEqual([3, 1, 2])
38+
})
39+
40+
it('handles arrays with tuple values', () => {
41+
const input: [string, string][] = [
42+
['term1', 'value1'],
43+
['term2', 'value2'],
44+
['term1', 'value1'],
45+
['term3', 'value1'],
46+
]
47+
const result = dedupe(input, elt => elt[1])
48+
expect(result).toEqual([
49+
['term1', 'value1'],
50+
['term2', 'value2'],
51+
])
52+
})
53+
})

0 commit comments

Comments
 (0)