Skip to content

Commit adfb67f

Browse files
committed
do not use vlm
1 parent 84bb567 commit adfb67f

File tree

1 file changed

+29
-20
lines changed

1 file changed

+29
-20
lines changed

src/server/services/rag/fileStore.ts

Lines changed: 29 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,11 @@
1-
import { GetObjectCommand, DeleteObjectCommand, PutObjectCommand, ListObjectsV2Command, DeleteObjectsCommand, ListObjectsV2CommandOutput } from '@aws-sdk/client-s3'
1+
import {
2+
GetObjectCommand,
3+
DeleteObjectCommand,
4+
PutObjectCommand,
5+
ListObjectsV2Command,
6+
DeleteObjectsCommand,
7+
ListObjectsV2CommandOutput,
8+
} from '@aws-sdk/client-s3'
29
import type { RagFile, RagIndex } from '../../db/models'
310
import { ApplicationError } from '../../util/ApplicationError'
411
import { pdfToText, pdfToTextWithVLM } from '../../util/pdfToText'
@@ -27,16 +34,16 @@ export const FileStore = {
2734
Prefix: prefix,
2835
ContinuationToken: continuationToken,
2936
})
30-
const listResponse = await s3Client.send(listCommand) as ListObjectsV2CommandOutput
31-
const keys = (listResponse.Contents || []).map(obj => ({ Key: obj.Key! }))
37+
const listResponse = (await s3Client.send(listCommand)) as ListObjectsV2CommandOutput
38+
const keys = (listResponse.Contents || []).map((obj) => ({ Key: obj.Key! }))
3239

3340
if (keys.length > 0) {
3441
const deleteCommand = new DeleteObjectsCommand({
3542
Bucket: S3_BUCKET,
36-
Delete: { Objects: keys }
43+
Delete: { Objects: keys },
3744
})
3845
const deleteResponse = await s3Client.send(deleteCommand)
39-
console.log("Deleted:", deleteResponse.Deleted?.length, "objects.")
46+
console.log('Deleted:', deleteResponse.Deleted?.length, 'objects.')
4047
}
4148

4249
continuationToken = listResponse.NextContinuationToken
@@ -81,13 +88,15 @@ export const FileStore = {
8188
try {
8289
const fileObj = await s3Client.send(new GetObjectCommand({ Bucket: S3_BUCKET, Key: s3Key }))
8390
const buf = await streamToBuffer(fileObj.Body)
84-
const text = await pdfToTextWithVLM(buf)
85-
await s3Client.send(new PutObjectCommand({
86-
Bucket: S3_BUCKET,
87-
Key: pdfTextKey,
88-
Body: JSON.stringify(text, null, 2),
89-
ContentType: 'text/plain',
90-
}))
91+
const text = await pdfToText(buf)
92+
await s3Client.send(
93+
new PutObjectCommand({
94+
Bucket: S3_BUCKET,
95+
Key: pdfTextKey,
96+
Body: JSON.stringify(text, null, 2),
97+
ContentType: 'text/plain',
98+
}),
99+
)
91100
return text
92101
} catch (error) {
93102
console.error(`Failed to create PDF text file ${pdfTextKey} in S3:`, error)
@@ -114,12 +123,14 @@ export const FileStore = {
114123

115124
async saveText(s3Key: string, text: string) {
116125
try {
117-
await s3Client.send(new PutObjectCommand({
118-
Bucket: S3_BUCKET,
119-
Key: s3Key,
120-
Body: text,
121-
ContentType: 'text/plain',
122-
}))
126+
await s3Client.send(
127+
new PutObjectCommand({
128+
Bucket: S3_BUCKET,
129+
Key: s3Key,
130+
Body: text,
131+
ContentType: 'text/plain',
132+
}),
133+
)
123134
} catch (error) {
124135
console.error(`Failed to save text content to ${s3Key} in S3:`, error)
125136
throw ApplicationError.InternalServerError(`Failed to save text content`)
@@ -136,7 +147,6 @@ const streamToString = (stream: any): Promise<string> => {
136147
})
137148
}
138149

139-
140150
const streamToBuffer = (stream: any): Promise<Buffer> => {
141151
return new Promise((resolve, reject) => {
142152
const chunks: any[] = []
@@ -145,4 +155,3 @@ const streamToBuffer = (stream: any): Promise<Buffer> => {
145155
stream.on('end', () => resolve(Buffer.concat(chunks)))
146156
})
147157
}
148-

0 commit comments

Comments
 (0)