Skip to content

Commit ddc7ddc

Browse files
Improve check performance (#763)
* Better error logs * Upgrade to generic-filehandle2 * Add cache for seq adapters * Add cache for refSeq documents * Use toSorted instead of cloning and then sorting * Fix wrong file name passed to handle * Get sequence up front if possible when checking
1 parent 868f11a commit ddc7ddc

File tree

14 files changed

+152
-1380
lines changed

14 files changed

+152
-1380
lines changed

packages/apollo-cli/README.md

Lines changed: 0 additions & 1255 deletions
Large diffs are not rendered by default.

packages/apollo-collaboration-server/package.json

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
"@emotion/react": "^11.10.6",
4646
"@emotion/styled": "^11.10.6",
4747
"@gmod/gff": "^2.0.0",
48-
"@gmod/indexedfasta": "^2.0.4",
48+
"@gmod/indexedfasta": "^5.0.2",
4949
"@jbrowse/core": "^4.1.1",
5050
"@jbrowse/mobx-state-tree": "^5.5.0",
5151
"@mui/material": "^7.3.7",
@@ -64,7 +64,7 @@
6464
"deepmerge": "^4.3.1",
6565
"express": "^4.18.0",
6666
"express-session": "^1.17.3",
67-
"generic-filehandle": "^3.0.0",
67+
"generic-filehandle2": "^2.0.18",
6868
"https-proxy-agent": "^7.0.6",
6969
"joi": "^17.7.0",
7070
"material-ui-popup-state": "^5.0.4",
@@ -79,6 +79,7 @@
7979
"passport-local": "^1.0.0",
8080
"passport-microsoft": "^1.0.0",
8181
"prop-types": "^15.8.1",
82+
"quick-lru": "^7.3.0",
8283
"react": "^18.2.0",
8384
"react-dom": "^18.2.0",
8485
"reflect-metadata": "^0.1.13",

packages/apollo-collaboration-server/src/changes/changes.service.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,9 @@ export class ChangesService {
237237
await this.refSeqChunkModel
238238
.deleteMany({ $and: [{ status: -1, user: uniqUserId }] })
239239
.exec()
240+
if (error instanceof Error) {
241+
throw error
242+
}
240243
throw new UnprocessableEntityException(String(error))
241244
}
242245
})

packages/apollo-collaboration-server/src/checks/checks.service.ts

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -100,12 +100,26 @@ export class ChecksService {
100100
if (!c) {
101101
throw new Error(`Check "${check.name}" not registered`)
102102
}
103-
const result = await c.checkFeature(
104-
flatDoc,
105-
(start: number, end: number) => {
103+
// If the feature is small enough, just fetch its sequence once and
104+
// substring that to avoid slow disk reads
105+
let featureSequence: string | undefined
106+
if (doc.max - doc.min <= 1_000_000) {
107+
featureSequence = await this.getSequence({
108+
start: doc.min,
109+
end: doc.max,
110+
featureDoc: doc,
111+
})
112+
}
113+
// eslint-disable-next-line unicorn/consistent-function-scoping
114+
const getSequence = (start: number, end: number) => {
115+
if (!featureSequence || start < doc.min || end > doc.max) {
106116
return this.getSequence({ start, end, featureDoc: doc })
107-
},
108-
)
117+
}
118+
return Promise.resolve(
119+
featureSequence.slice(start - doc.min, end - doc.min),
120+
)
121+
}
122+
const result = await c.checkFeature(flatDoc, getSequence)
109123
if (result.length > 0) {
110124
await this.checkResultModel.insertMany(result)
111125
}

packages/apollo-collaboration-server/src/files/files.service.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ import {
1414
} from '@nestjs/common'
1515
import { ConfigService } from '@nestjs/config'
1616
import { InjectModel } from '@nestjs/mongoose'
17-
import { type GenericFilehandle, LocalFile } from 'generic-filehandle'
17+
import { type GenericFilehandle, LocalFile } from 'generic-filehandle2'
1818
import { Model } from 'mongoose'
1919

2020
import { CreateFileDto } from './dto/create-file.dto.js'

packages/apollo-collaboration-server/src/files/filesUtil.ts

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ import type {
2020
FilehandleOptions,
2121
GenericFilehandle,
2222
Stats,
23-
} from 'generic-filehandle'
23+
} from 'generic-filehandle2'
2424

2525
interface FileUpload {
2626
originalname: string
@@ -116,20 +116,11 @@ export class LocalFileGzip implements GenericFilehandle {
116116
this.contents = fhPromise.then((fh) => unzip(fh))
117117
}
118118

119-
public async read(
120-
buffer: Buffer,
121-
offset = 0,
122-
length: number,
123-
position = 0,
124-
): Promise<{ bytesRead: number; buffer: Buffer }> {
119+
public async read(length: number, position = 0): Promise<Buffer> {
120+
const buffer = Buffer.alloc(length)
125121
const unzippedContents = await this.contents
126-
const bytesRead = unzippedContents.copy(
127-
buffer,
128-
offset,
129-
position,
130-
position + length,
131-
)
132-
return { bytesRead, buffer }
122+
unzippedContents.copy(buffer, 0, position, position + length)
123+
return buffer
133124
}
134125

135126
public async readFile(

packages/apollo-collaboration-server/src/global-exceptions.filter.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,14 @@ export class GlobalExceptionsFilter extends BaseExceptionFilter {
77

88
catch(exception: unknown, host: ArgumentsHost) {
99
this.logger.error(exception)
10+
if (exception instanceof Error) {
11+
if (exception.cause) {
12+
this.logger.error(exception.cause)
13+
}
14+
if (exception.stack) {
15+
this.logger.error(exception.stack)
16+
}
17+
}
1018
super.catch(exception, host)
1119
}
1220
}

packages/apollo-collaboration-server/src/sequence/sequence.service.ts

Lines changed: 83 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,36 @@ import {
1111
import { BgzipIndexedFasta, IndexedFasta } from '@gmod/indexedfasta'
1212
import { Injectable, Logger } from '@nestjs/common'
1313
import { InjectModel } from '@nestjs/mongoose'
14-
import { RemoteFile } from 'generic-filehandle'
14+
import { type GenericFilehandle, RemoteFile } from 'generic-filehandle2'
1515
import { Model } from 'mongoose'
16+
import QuickLRU from 'quick-lru'
1617

1718
import { AssembliesService } from '../assemblies/assemblies.service.js'
1819
import { FilesService } from '../files/files.service.js'
1920

2021
import { GetSequenceDto } from './dto/get-sequence.dto.js'
2122

23+
interface AdapterCache {
24+
adapter: IndexedFasta | BgzipIndexedFasta
25+
fileHandles: GenericFilehandle[]
26+
}
27+
28+
const adapterLRU = new QuickLRU<string, AdapterCache>({
29+
maxSize: 100,
30+
maxAge: 24 * 60 * 60 * 1000,
31+
onEviction(key, adapter) {
32+
const { fileHandles } = adapter
33+
for (const fileHandle of fileHandles) {
34+
void fileHandle.close()
35+
}
36+
},
37+
})
38+
39+
const refSeqDocLRU = new QuickLRU<string, RefSeqDocument>({
40+
maxSize: 100,
41+
maxAge: 24 * 60 * 60 * 1000,
42+
})
43+
2244
@Injectable()
2345
export class SequenceService {
2446
constructor(
@@ -35,9 +57,15 @@ export class SequenceService {
3557
private readonly logger = new Logger(SequenceService.name)
3658

3759
async getSequence({ end, refSeq: refSeqId, start }: GetSequenceDto) {
38-
const refSeq = await this.refSeqModel.findById(refSeqId)
60+
let refSeq: RefSeqDocument | null | undefined = refSeqDocLRU.get(
61+
String(refSeqId),
62+
)
3963
if (!refSeq) {
40-
throw new Error(`RefSeq "${refSeqId}" not found`)
64+
refSeq = await this.refSeqModel.findById(refSeqId)
65+
if (!refSeq) {
66+
throw new Error(`RefSeq "${refSeqId}" not found`)
67+
}
68+
refSeqDocLRU.set(String(refSeqId), refSeq)
4169
}
4270

4371
const { assembly, chunkSize, name } = refSeq
@@ -47,17 +75,34 @@ export class SequenceService {
4775

4876
if (assemblyDoc.externalLocation) {
4977
const { fa, fai, gzi } = assemblyDoc.externalLocation
78+
const adapterCacheEntry = adapterLRU.get(fa)
79+
let sequenceAdapter = adapterCacheEntry?.adapter
5080

51-
const sequenceAdapter = gzi
52-
? new BgzipIndexedFasta({
53-
fasta: new RemoteFile(fa, { fetch }),
54-
fai: new RemoteFile(fai, { fetch }),
55-
gzi: new RemoteFile(gzi, { fetch }),
81+
if (!sequenceAdapter) {
82+
const fastaHandle = new RemoteFile(fa, { fetch })
83+
const faiHandle = new RemoteFile(fai, { fetch })
84+
if (gzi) {
85+
const gziHandle = new RemoteFile(gzi, { fetch })
86+
sequenceAdapter = new BgzipIndexedFasta({
87+
fasta: fastaHandle,
88+
fai: faiHandle,
89+
gzi: gziHandle,
5690
})
57-
: new IndexedFasta({
58-
fasta: new RemoteFile(fa, { fetch }),
59-
fai: new RemoteFile(fai, { fetch }),
91+
adapterLRU.set(fa, {
92+
adapter: sequenceAdapter,
93+
fileHandles: [fastaHandle, faiHandle, gziHandle],
6094
})
95+
} else {
96+
sequenceAdapter = new IndexedFasta({
97+
fasta: fastaHandle,
98+
fai: faiHandle,
99+
})
100+
adapterLRU.set(fa, {
101+
adapter: sequenceAdapter,
102+
fileHandles: [fastaHandle, faiHandle],
103+
})
104+
}
105+
}
61106
const sequence = await sequenceAdapter.getSequence(name, start, end)
62107
if (sequence === undefined) {
63108
throw new Error('Sequence not found')
@@ -67,29 +112,37 @@ export class SequenceService {
67112

68113
if (assemblyDoc.fileIds?.fai) {
69114
const { fa: faId, fai: faiId, gzi: gziId } = assemblyDoc.fileIds
70-
const faDoc = await this.fileModel.findById(faId)
71-
if (!faDoc) {
72-
throw new Error(`No checksum for file document ${faId}`)
73-
}
115+
const adapterCacheEntry = adapterLRU.get(String(faId))
116+
let sequenceAdapter = adapterCacheEntry?.adapter
117+
if (!sequenceAdapter) {
118+
const faDoc = await this.fileModel.findById(faId)
119+
if (!faDoc) {
120+
throw new Error(`No checksum for file document ${faId}`)
121+
}
74122

75-
const faiDoc = await this.fileModel.findById(faiId)
76-
if (!faiDoc) {
77-
throw new Error(`File document not found for ${faiId}`)
78-
}
123+
const faiDoc = await this.fileModel.findById(faiId)
124+
if (!faiDoc) {
125+
throw new Error(`File document not found for ${faiId}`)
126+
}
79127

80-
const gziDoc = await this.fileModel.findById(gziId)
81-
if (!gziDoc) {
82-
throw new Error(`File document not found for ${gziId}`)
83-
}
128+
const gziDoc = await this.fileModel.findById(gziId)
129+
if (!gziDoc) {
130+
throw new Error(`File document not found for ${gziId}`)
131+
}
84132

85-
const fasta = this.filesService.getFileHandle(faDoc)
86-
const fai = this.filesService.getFileHandle(faiDoc)
87-
const gzi = gziId ? this.filesService.getFileHandle(gziDoc) : undefined
88-
const sequenceAdapter = gziId
89-
? new BgzipIndexedFasta({ fasta, fai, gzi })
90-
: new IndexedFasta({ fasta, fai })
133+
const fasta = this.filesService.getFileHandle(faDoc)
134+
const fai = this.filesService.getFileHandle(faiDoc)
135+
const gzi = gziId ? this.filesService.getFileHandle(gziDoc) : undefined
136+
sequenceAdapter = gziId
137+
? new BgzipIndexedFasta({ fasta, fai, gzi })
138+
: new IndexedFasta({ fasta, fai })
139+
const fileHandles = [fasta, fai]
140+
if (gzi) {
141+
fileHandles.push(gzi)
142+
}
143+
adapterLRU.set(String(faId), { adapter: sequenceAdapter, fileHandles })
144+
}
91145
const sequence = await sequenceAdapter.getSequence(name, start, end)
92-
await Promise.all([fasta.close(), fai.close(), gzi?.close()])
93146
if (sequence === undefined) {
94147
throw new Error('Sequence not found')
95148
}

packages/apollo-common/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
"@nestjs/common": "^10.1.0",
2525
"@nestjs/core": "^10.1.0",
2626
"@types/node": "^24.10.9",
27-
"generic-filehandle": "^3.0.0",
27+
"generic-filehandle2": "^2.0.18",
2828
"mongoose": "^6.12.0",
2929
"typescript": "^5.5.3"
3030
},

packages/apollo-common/src/Operation.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ import type {
1515
} from '@apollo-annotation/schemas'
1616
import type { GFF3Feature } from '@gmod/gff'
1717
import type { LoggerService } from '@nestjs/common'
18-
import type { GenericFilehandle } from 'generic-filehandle'
18+
import type { GenericFilehandle } from 'generic-filehandle2'
1919
import type { ClientSession, Model } from 'mongoose'
2020

2121
export interface LocalGFF3DataStore {

0 commit comments

Comments
 (0)