@@ -3,9 +3,10 @@ import path from 'node:path'
3
3
import fs from 'node:fs/promises'
4
4
import { S3Client , GetObjectCommand , PutObjectCommand } from '@aws-sdk/client-s3'
5
5
import { pipeline } from 'node:stream'
6
- import { createWriteStream } from 'node:fs'
7
- import Redis , { RedisOptions } from 'ioredis'
8
- import { pdfToPng , PngPageOutput } from 'pdf-to-png-converter'
6
+ import { createWriteStream , createReadStream } from 'node:fs'
7
+ import Redis from 'ioredis'
8
+ import { pdfToPng } from 'pdf-to-png-converter'
9
+ import { v4 as uuidv4 } from 'uuid'
9
10
import { promisify } from 'node:util'
10
11
import pdfToText from 'pdf-parse-fork'
11
12
import dotenv from 'dotenv'
@@ -14,18 +15,18 @@ dotenv.config()
14
15
15
16
const pipelineAsync = promisify ( pipeline )
16
17
17
- const downloadS3ToFile = async ( s3 , bucket , key , destPath ) => {
18
+ async function downloadS3ToFile ( s3 , bucket , key , destPath ) {
18
19
const res = await s3 . send ( new GetObjectCommand ( { Bucket : bucket , Key : key } ) )
19
20
await fs . mkdir ( path . dirname ( destPath ) , { recursive : true } )
20
21
await pipelineAsync ( res . Body , createWriteStream ( destPath ) )
21
22
}
22
23
23
- const uploadFileToS3 = async ( s3 , bucket , key , filePath , contentType ) => {
24
+ async function uploadFileToS3 ( s3 , bucket , key , filePath , contentType ) {
24
25
const Body = await fs . readFile ( filePath )
25
26
await s3 . send ( new PutObjectCommand ( { Bucket : bucket , Key : key , Body, ContentType : contentType } ) )
26
27
}
27
28
28
- const pathExists = async ( p ) => {
29
+ async function pathExists ( p ) {
29
30
try {
30
31
await fs . access ( p )
31
32
return true
@@ -34,7 +35,7 @@ const pathExists = async (p) => {
34
35
}
35
36
}
36
37
37
- const guessContentType = ( filePath : string ) => {
38
+ function guessContentType ( filePath ) {
38
39
const ext = path . extname ( filePath ) . toLowerCase ( )
39
40
if ( ext === '.txt' ) return 'text/plain charset=utf-8'
40
41
if ( ext === '.json' ) return 'application/json'
@@ -48,19 +49,19 @@ const guessContentType = (filePath: string) => {
48
49
49
50
// --- Config ---
50
51
51
- const REDIS_HOST = process . env . REDIS_HOST ?? ''
52
- const REDIS_PORT = parseInt ( process . env . REDIS_PORT ?? '6379' )
53
- const CA = process . env . CA ?? ''
54
- const CERT = process . env . CERT ?? ''
55
- const KEY = process . env . KEY ?? ''
52
+ const REDIS_HOST = process . env . REDIS_HOST
53
+ const REDIS_PORT = process . env . REDIS_PORT
54
+ const CA = process . env . CA || undefined
55
+ const CERT = process . env . CERT
56
+ const KEY = process . env . KEY
56
57
57
- let creds : RedisOptions = {
58
+ let creds = {
58
59
host : REDIS_HOST ,
59
60
port : REDIS_PORT ,
60
61
maxRetriesPerRequest : null ,
61
62
}
62
63
63
- if ( CA . length > 0 ) {
64
+ if ( CA !== undefined ) {
64
65
creds = {
65
66
...creds ,
66
67
tls : {
@@ -76,12 +77,12 @@ const RETRY_COUNT = 1
76
77
77
78
const connection = new Redis ( creds )
78
79
79
- const QUEUE_NAME = process . env . LLAMA_SCAN_QUEUE ?? 'llama-scan-queue'
80
- const S3_HOST = process . env . S3_HOST ?? ''
81
- const S3_ACCESS_KEY = process . env . S3_ACCESS_KEY ?? ''
82
- const S3_SECRET_ACCESS_KEY = process . env . S3_SECRET_ACCESS_KEY ?? ''
80
+ const QUEUE_NAME = process . env . LLAMA_SCAN_QUEUE || 'llama-scan-queue'
81
+ const S3_HOST = process . env . S3_HOST || ''
82
+ const S3_ACCESS_KEY = process . env . S3_ACCESS_KEY
83
+ const S3_SECRET_ACCESS_KEY = process . env . S3_SECRET_ACCESS_KEY
83
84
const OLLAMA_URL = process . env . LAAMA_API_URL ?? process . env . OLLAMA_URL
84
- const LAAMA_API_TOKEN = process . env . LAAMA_API_TOKEN ?? ''
85
+ const LAAMA_API_TOKEN = process . LAAMA_API_TOKEN ?? ''
85
86
86
87
const s3 = new S3Client ( {
87
88
region : 'eu-north-1' ,
@@ -93,8 +94,8 @@ const s3 = new S3Client({
93
94
} ,
94
95
} )
95
96
96
- const retryOllamaCall = async ( fn : Function , maxRetries = 3 ) => {
97
- let lastError : Error | undefined = undefined
97
+ async function retryOllamaCall ( fn , maxRetries = 3 ) {
98
+ let lastError
98
99
for ( let i = 0 ; i < maxRetries ; i ++ ) {
99
100
// Health check before each attempt
100
101
try {
@@ -127,7 +128,7 @@ const worker = new Worker(
127
128
throw new Error ( 'outputBucket is required in job data' )
128
129
}
129
130
130
- const jobIdPath = job ? .id ?. replace ( '\//g ' , '_' ) ?? ''
131
+ const jobIdPath = job . id . replaceAll ( '\/' , '_' )
131
132
132
133
const uploadsDir = './uploads'
133
134
const jobRootDir = path . join ( uploadsDir , jobIdPath )
@@ -155,7 +156,7 @@ const worker = new Worker(
155
156
/**
156
157
* Convert PDF pages to text
157
158
*/
158
- const pagerender = ( pageData ) => {
159
+ function pagerender ( pageData ) {
159
160
let render_options = {
160
161
normalizeWhitespace : false ,
161
162
disableCombineTextItems : false ,
@@ -198,12 +199,12 @@ const worker = new Worker(
198
199
/**
199
200
* Convert PDF pages to PNG images
200
201
*/
201
- let pngPages : PngPageOutput [ ]
202
+ let pngPages
202
203
try {
203
204
pngPages = await pdfToPng ( inputLocalPath , {
204
205
outputFileMaskFunc : ( pageNumber ) => `page_${ pageNumber } .png` ,
205
206
outputFolder : outputImagesDir ,
206
- } )
207
+ } ) ;
207
208
} catch ( error ) {
208
209
console . error ( `Job ${ job . id } failed: PDF to PNG conversion failed` , error )
209
210
throw new Error ( 'PDF to PNG conversion failed' )
@@ -293,8 +294,8 @@ const worker = new Worker(
293
294
text = text . replace ( / ^ ` ` ` m a r k d o w n / , '' ) . replace ( / ` ` ` $ / , '' ) . trim ( )
294
295
}
295
296
// Add page number to the end of the first line if it's a heading
296
- const appendToFirstLine = ( content : string , suffix : string ) => {
297
- return content . replace ( / ^ [ ^ \r \n ] * / , ( match : string ) => match + suffix )
297
+ function appendToFirstLine ( content , suffix ) {
298
+ return content . replace ( / ^ [ ^ \r \n ] * / , ( match ) => match + suffix )
298
299
}
299
300
if ( text . trim ( ) . startsWith ( '#' ) ) {
300
301
text = appendToFirstLine ( text , ` (Page ${ pngPage . pageNumber } )` )
@@ -342,15 +343,15 @@ const worker = new Worker(
342
343
343
344
console . log ( `Worker started. Listening to queue "${ QUEUE_NAME } "...` )
344
345
345
- worker . on ( 'completed' , ( job , _result ) => {
346
+ worker . on ( 'completed' , ( job , result ) => {
346
347
console . log ( `Job ${ job . id } completed.` )
347
348
} )
348
349
349
350
worker . on ( 'failed' , ( job , err ) => {
350
351
console . error ( `Job ${ job ?. id } failed:` , err )
351
352
} )
352
353
353
- const shutdown = async ( ) => {
354
+ async function shutdown ( ) {
354
355
console . log ( 'Shutting down worker...' )
355
356
try { await worker . close ( ) } catch { }
356
357
process . exit ( 0 )
0 commit comments