@@ -3,10 +3,9 @@ import path from 'node:path'
3
3
import fs from 'node:fs/promises'
4
4
import { S3Client , GetObjectCommand , PutObjectCommand } from '@aws-sdk/client-s3'
5
5
import { pipeline } from 'node:stream'
6
- import { createWriteStream , createReadStream } from 'node:fs'
7
- import Redis from 'ioredis'
8
- import { pdfToPng } from 'pdf-to-png-converter'
9
- import { v4 as uuidv4 } from 'uuid'
6
+ import { createWriteStream } from 'node:fs'
7
+ import Redis , { RedisOptions } from 'ioredis'
8
+ import { pdfToPng , PngPageOutput } from 'pdf-to-png-converter'
10
9
import { promisify } from 'node:util'
11
10
import pdfToText from 'pdf-parse-fork'
12
11
import dotenv from 'dotenv'
@@ -15,18 +14,18 @@ dotenv.config()
15
14
16
15
const pipelineAsync = promisify ( pipeline )
17
16
18
- async function downloadS3ToFile ( s3 , bucket , key , destPath ) {
17
+ const downloadS3ToFile = async ( s3 , bucket , key , destPath ) => {
19
18
const res = await s3 . send ( new GetObjectCommand ( { Bucket : bucket , Key : key } ) )
20
19
await fs . mkdir ( path . dirname ( destPath ) , { recursive : true } )
21
20
await pipelineAsync ( res . Body , createWriteStream ( destPath ) )
22
21
}
23
22
24
- async function uploadFileToS3 ( s3 , bucket , key , filePath , contentType ) {
23
+ const uploadFileToS3 = async ( s3 , bucket , key , filePath , contentType ) => {
25
24
const Body = await fs . readFile ( filePath )
26
25
await s3 . send ( new PutObjectCommand ( { Bucket : bucket , Key : key , Body, ContentType : contentType } ) )
27
26
}
28
27
29
- async function pathExists ( p ) {
28
+ const pathExists = async ( p ) => {
30
29
try {
31
30
await fs . access ( p )
32
31
return true
@@ -35,7 +34,7 @@ async function pathExists(p) {
35
34
}
36
35
}
37
36
38
- function guessContentType ( filePath ) {
37
+ const guessContentType = ( filePath : string ) => {
39
38
const ext = path . extname ( filePath ) . toLowerCase ( )
40
39
if ( ext === '.txt' ) return 'text/plain charset=utf-8'
41
40
if ( ext === '.json' ) return 'application/json'
@@ -49,19 +48,19 @@ function guessContentType(filePath) {
49
48
50
49
// --- Config ---
51
50
52
- const REDIS_HOST = process . env . REDIS_HOST
53
- const REDIS_PORT = process . env . REDIS_PORT
54
- const CA = process . env . CA || undefined
55
- const CERT = process . env . CERT
56
- const KEY = process . env . KEY
51
+ const REDIS_HOST = process . env . REDIS_HOST ?? ''
52
+ const REDIS_PORT = parseInt ( process . env . REDIS_PORT ?? '6379' )
53
+ const CA = process . env . CA ?? ''
54
+ const CERT = process . env . CERT ?? ''
55
+ const KEY = process . env . KEY ?? ''
57
56
58
- let creds = {
57
+ let creds : RedisOptions = {
59
58
host : REDIS_HOST ,
60
59
port : REDIS_PORT ,
61
60
maxRetriesPerRequest : null ,
62
61
}
63
62
64
- if ( CA !== undefined ) {
63
+ if ( CA . length > 0 ) {
65
64
creds = {
66
65
...creds ,
67
66
tls : {
@@ -77,12 +76,12 @@ const RETRY_COUNT = 1
77
76
78
77
const connection = new Redis ( creds )
79
78
80
- const QUEUE_NAME = process . env . LLAMA_SCAN_QUEUE || 'llama-scan-queue'
81
- const S3_HOST = process . env . S3_HOST || ''
82
- const S3_ACCESS_KEY = process . env . S3_ACCESS_KEY
83
- const S3_SECRET_ACCESS_KEY = process . env . S3_SECRET_ACCESS_KEY
79
+ const QUEUE_NAME = process . env . LLAMA_SCAN_QUEUE ?? 'llama-scan-queue'
80
+ const S3_HOST = process . env . S3_HOST ?? ''
81
+ const S3_ACCESS_KEY = process . env . S3_ACCESS_KEY ?? ''
82
+ const S3_SECRET_ACCESS_KEY = process . env . S3_SECRET_ACCESS_KEY ?? ''
84
83
const OLLAMA_URL = process . env . LAAMA_API_URL ?? process . env . OLLAMA_URL
85
- const LAAMA_API_TOKEN = process . LAAMA_API_TOKEN ?? ''
84
+ const LAAMA_API_TOKEN = process . env . LAAMA_API_TOKEN ?? ''
86
85
87
86
const s3 = new S3Client ( {
88
87
region : 'eu-north-1' ,
@@ -94,8 +93,8 @@ const s3 = new S3Client({
94
93
} ,
95
94
} )
96
95
97
- async function retryOllamaCall ( fn , maxRetries = 3 ) {
98
- let lastError
96
+ const retryOllamaCall = async ( fn : Function , maxRetries = 3 ) => {
97
+ let lastError : Error | undefined = undefined
99
98
for ( let i = 0 ; i < maxRetries ; i ++ ) {
100
99
// Health check before each attempt
101
100
try {
@@ -128,7 +127,7 @@ const worker = new Worker(
128
127
throw new Error ( 'outputBucket is required in job data' )
129
128
}
130
129
131
- const jobIdPath = job . id . replaceAll ( '\/' , '_' )
130
+ const jobIdPath = job ? .id ?. replace ( '\//g ' , '_' ) ?? ''
132
131
133
132
const uploadsDir = './uploads'
134
133
const jobRootDir = path . join ( uploadsDir , jobIdPath )
@@ -156,7 +155,7 @@ const worker = new Worker(
156
155
/**
157
156
* Convert PDF pages to text
158
157
*/
159
- function pagerender ( pageData ) {
158
+ const pagerender = ( pageData ) => {
160
159
let render_options = {
161
160
normalizeWhitespace : false ,
162
161
disableCombineTextItems : false ,
@@ -199,12 +198,12 @@ const worker = new Worker(
199
198
/**
200
199
* Convert PDF pages to PNG images
201
200
*/
202
- let pngPages
201
+ let pngPages : PngPageOutput [ ]
203
202
try {
204
203
pngPages = await pdfToPng ( inputLocalPath , {
205
204
outputFileMaskFunc : ( pageNumber ) => `page_${ pageNumber } .png` ,
206
205
outputFolder : outputImagesDir ,
207
- } ) ;
206
+ } )
208
207
} catch ( error ) {
209
208
console . error ( `Job ${ job . id } failed: PDF to PNG conversion failed` , error )
210
209
throw new Error ( 'PDF to PNG conversion failed' )
@@ -294,8 +293,8 @@ const worker = new Worker(
294
293
text = text . replace ( / ^ ` ` ` m a r k d o w n / , '' ) . replace ( / ` ` ` $ / , '' ) . trim ( )
295
294
}
296
295
// Add page number to the end of the first line if it's a heading
297
- function appendToFirstLine ( content , suffix ) {
298
- return content . replace ( / ^ [ ^ \r \n ] * / , ( match ) => match + suffix )
296
+ const appendToFirstLine = ( content : string , suffix : string ) => {
297
+ return content . replace ( / ^ [ ^ \r \n ] * / , ( match : string ) => match + suffix )
299
298
}
300
299
if ( text . trim ( ) . startsWith ( '#' ) ) {
301
300
text = appendToFirstLine ( text , ` (Page ${ pngPage . pageNumber } )` )
@@ -343,15 +342,15 @@ const worker = new Worker(
343
342
344
343
console . log ( `Worker started. Listening to queue "${ QUEUE_NAME } "...` )
345
344
346
- worker . on ( 'completed' , ( job , result ) => {
345
+ worker . on ( 'completed' , ( job , _result ) => {
347
346
console . log ( `Job ${ job . id } completed.` )
348
347
} )
349
348
350
349
worker . on ( 'failed' , ( job , err ) => {
351
350
console . error ( `Job ${ job ?. id } failed:` , err )
352
351
} )
353
352
354
- async function shutdown ( ) {
353
+ const shutdown = async ( ) => {
355
354
console . log ( 'Shutting down worker...' )
356
355
try { await worker . close ( ) } catch { }
357
356
process . exit ( 0 )
0 commit comments