Skip to content

Commit 84bb567

Browse files
committed
connection to vlm
1 parent 812b7bc commit 84bb567

File tree

2 files changed

+39
-26
lines changed

2 files changed

+39
-26
lines changed

src/server/services/rag/fileStore.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import { GetObjectCommand, DeleteObjectCommand, PutObjectCommand, ListObjectsV2Command, DeleteObjectsCommand, ListObjectsV2CommandOutput } from '@aws-sdk/client-s3'
22
import type { RagFile, RagIndex } from '../../db/models'
33
import { ApplicationError } from '../../util/ApplicationError'
4-
import { pdfToText } from '../../util/pdfToText'
4+
import { pdfToText, pdfToTextWithVLM } from '../../util/pdfToText'
55
import { S3_BUCKET } from '../../util/config'
66
import { s3Client } from '../../routes/rag/ragIndex'
77

@@ -81,11 +81,11 @@ export const FileStore = {
8181
try {
8282
const fileObj = await s3Client.send(new GetObjectCommand({ Bucket: S3_BUCKET, Key: s3Key }))
8383
const buf = await streamToBuffer(fileObj.Body)
84-
const text = await pdfToText(buf)
84+
const text = await pdfToTextWithVLM(buf)
8585
await s3Client.send(new PutObjectCommand({
8686
Bucket: S3_BUCKET,
8787
Key: pdfTextKey,
88-
Body: text,
88+
Body: JSON.stringify(text, null, 2),
8989
ContentType: 'text/plain',
9090
}))
9191
return text

src/server/util/pdfToText.ts

Lines changed: 36 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,15 @@
11
import pdf from 'pdf-parse-fork'
2-
import { REDIS_HOST, REDIS_PORT } from './config'
2+
import axios from 'axios'
3+
import { LAAMA_API_TOKEN, REDIS_HOST, REDIS_PORT } from './config'
34
import { Queue, QueueEvents, Worker, Job } from 'bullmq'
45

6+
const dalaiClient = axios.create({
7+
baseURL: "https://api-gateway-toska.apps.ocp-bm-0.k8s.it.helsinki.fi/dalai",
8+
params: {
9+
token: LAAMA_API_TOKEN,
10+
},
11+
})
12+
513
export const pdfToText = async (fileBuffer: Buffer) => {
614
try {
715
const data = await pdf(fileBuffer)
@@ -13,32 +21,37 @@ export const pdfToText = async (fileBuffer: Buffer) => {
1321
}
1422
}
1523

16-
const connection = {
17-
host: REDIS_HOST,
18-
port: REDIS_PORT,
19-
}
24+
// const connection = {
25+
// host: REDIS_HOST,
26+
// port: REDIS_PORT,
27+
// }
2028

21-
const vlmQueue = new Queue('vlm-pdf-processing', { connection })
22-
const vlmQueueEvents = new QueueEvents('vlm-pdf-processing', { connection })
29+
// const vlmQueue = new Queue('vlm-pdf-processing', { connection })
30+
// const vlmQueueEvents = new QueueEvents('vlm-pdf-processing', { connection })
2331

24-
const vlmWorker = new Worker(
25-
'vlm-pdf-processing',
26-
async (job: Job) => {
27-
const { pdfBuffer } = job.data
28-
const result = await pdfToText(pdfBuffer)
32+
// const vlmWorker = new Worker(
33+
// 'vlm-pdf-processing',
34+
// async (job: Job) => {
35+
// const { pdfBuffer } = job.data
36+
// const result = await dalaiClient.post(pdfBuffer)
2937

30-
return result
31-
},
32-
{ connection, autorun: false },
33-
)
38+
// return result
39+
// },
40+
// { connection, autorun: false },
41+
// )
3442

3543
export const pdfToTextWithVLM = async (fileBuffer: Buffer) => {
36-
const job = await vlmQueue.add('vlm-pdf-processing', {
37-
pdfBuffer: fileBuffer,
38-
})
44+
const form = new FormData()
3945

40-
const result = await job.waitUntilFinished(vlmQueueEvents)
41-
return result
42-
}
46+
const pdfBlob = new Blob([fileBuffer], { type: "application/pdf" })
47+
form.append('file', pdfBlob, 'file.pdf')
48+
49+
const response = await dalaiClient.post(
50+
'/scan',
51+
form,
52+
// @ts-ignore
53+
{ headers: {} }
54+
)
4355

44-
vlmWorker.run()
56+
return response.data
57+
}

0 commit comments

Comments
 (0)