@@ -117,6 +117,25 @@ const worker = new Worker(
117
117
async ( job ) => {
118
118
const { s3Bucket, s3Key, outputBucket } = job . data || { }
119
119
120
+ /**
121
+ * Full progress from 0 to 100
122
+ */
123
+ let _progress = 0
124
+ /**
125
+ *
126
+ * @param progress fraction (0-1) of progress of the current section
127
+ * @param sectionSize the size of the section as a percentage of the whole job (0-100). All section sizes should add up to 100.
128
+ */
129
+ const incrementProgress = ( progress : number , sectionSize : number ) => {
130
+ _progress += progress * sectionSize
131
+ job
132
+ . updateProgress ( {
133
+ ragFileId : job . data . ragFileId ,
134
+ progress : _progress ,
135
+ } )
136
+ . catch ( ( ) => { } )
137
+ }
138
+
120
139
console . log ( `Processing job ${ job . id } ` )
121
140
122
141
if ( ! s3Bucket || ! s3Key ) {
@@ -146,6 +165,8 @@ const worker = new Worker(
146
165
await fs . mkdir ( outputTextDir , { recursive : true } )
147
166
await fs . mkdir ( outputImagesDir , { recursive : true } )
148
167
168
+ incrementProgress ( 1 , 1 ) // 1% - Setup directories
169
+
149
170
/**
150
171
* Download the pdf
151
172
*/
@@ -155,6 +176,8 @@ const worker = new Worker(
155
176
throw new Error ( `Failed to download s3://${ s3Bucket } /${ s3Key } : ${ err . message || err } ` )
156
177
}
157
178
179
+ incrementProgress ( 1 , 1 ) // 1% - Download PDF
180
+
158
181
/**
159
182
* Convert PDF pages to text
160
183
*/
@@ -202,6 +225,8 @@ const worker = new Worker(
202
225
throw new Error ( 'PDF to text conversion failed' )
203
226
}
204
227
228
+ incrementProgress ( 1 , 2 ) // 2% - PDF to text
229
+
205
230
/**
206
231
* Convert PDF pages to PNG images
207
232
*/
@@ -216,6 +241,8 @@ const worker = new Worker(
216
241
throw new Error ( 'PDF to PNG conversion failed' )
217
242
}
218
243
244
+ incrementProgress ( 1 , 6 ) // 6% - PDF to PNGs. Total so far: 10%
245
+
219
246
/**
220
247
* Transcription & Markdown Generation (with Ollama health/retry, fallback to PDF text)
221
248
*/
@@ -268,6 +295,10 @@ const worker = new Worker(
268
295
const txt = data ?. response || ''
269
296
await fs . writeFile ( existingTxtPath , txt , 'utf-8' )
270
297
console . log ( `Job ${ job . id } : transcription complete for page ${ pngPage . pageNumber } /${ pngPages . length } ` )
298
+
299
+ const pageProgress = 0.5 / pngPages . length // Halfway through the page processing
300
+ incrementProgress ( pageProgress , 87 ) // 87% - VLM & Markdown
301
+
271
302
return txt
272
303
} , RETRY_COUNT )
273
304
finalText = await retryOllamaCall ( async ( ) => {
@@ -311,6 +342,7 @@ const worker = new Worker(
311
342
}
312
343
await fs . writeFile ( existingMdPath , text , 'utf-8' )
313
344
console . log ( `Job ${ job . id } : markdown generation complete for page ${ pngPage . pageNumber } /${ pngPages . length } ` )
345
+
314
346
return text
315
347
} , RETRY_COUNT )
316
348
} catch ( error ) {
@@ -320,6 +352,9 @@ const worker = new Worker(
320
352
await fs . writeFile ( existingMdPath , finalText , 'utf-8' )
321
353
}
322
354
355
+ const pageProgress = 0.5 / pngPages . length // Second half of the page processing done
356
+ incrementProgress ( pageProgress , 87 ) // 87% - VLM & Markdown. Total so far: 97%
357
+
323
358
resultingMarkdown += `\n\n${ finalText } `
324
359
}
325
360
@@ -336,6 +371,8 @@ const worker = new Worker(
336
371
throw new Error ( `Failed uploading outputs to s3://${ outputBucket } : ${ err . message || err } ` )
337
372
}
338
373
374
+ incrementProgress ( 1 , 3 ) // 97 + 3 = 100% - Upload results
375
+
339
376
return {
340
377
input : { bucket : s3Bucket , key : s3Key } ,
341
378
output : { bucket : outputBucket } ,
0 commit comments