Skip to content

Commit fc386dd

Browse files
committed
make pngs to be supported in advanced parsing
1 parent ac09de2 commit fc386dd

File tree

4 files changed

+19
-11
lines changed

4 files changed

+19
-11
lines changed

src/client/components/Rag/RagFileDetails.tsx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ export const RagFileInfo: React.FC<{
7474
const { t, i18n } = useTranslation()
7575
const usedAdvancedParsing = !!(file.metadata as Record<string, unknown> | null)?.advancedParsing
7676
const isPdf = file.fileType === 'application/pdf'
77+
const isImage = file.fileType === 'image/png'
7778

7879
const pipelineStage = status?.pipelineStage ?? file.pipelineStage
7980

@@ -100,7 +101,7 @@ export const RagFileInfo: React.FC<{
100101
<HideOnSmall>{file.fileType}</HideOnSmall>
101102
<TableCell>{(file.fileSize / 1024).toFixed()} kB</TableCell>
102103
<TableCell>
103-
{isPdf && (
104+
{(isPdf || isImage) && (
104105
<Box display="flex" alignItems="center" gap={0.5}>
105106
<Typography variant="body2">
106107
{usedAdvancedParsing ? t('rag:advancedParsing') : t('rag:standardParsing')}

src/server/services/jobs/pdfParsing.job.ts

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -110,19 +110,24 @@ type VLMJobData = {
110110
text: string
111111
}
112112

113+
const isImage = (ragFile: RagFile) => ragFile.fileType === 'image/png'
114+
113115
/**
114116
* Adds an advanced pdf parsing job to the queue. The file must be uploaded to S3 beforehand. The jobId is based on the ragFile - resubmitting with the same jobId while the previous job is running has no effect.
115117
* @param ragFile
116118
* @returns the pages which is array of PageInfo objects
117119
*/
118-
export const submitAdvancedPdfParsingJobs = async (ragFile: RagFile) => {
119-
const pdfBytes = await FileStore.readRagFileContextToBytes(ragFile)
120+
export const submitAdvancedParsingJobs = async (ragFile: RagFile) => {
121+
const fileBytes = await FileStore.readRagFileContextToBytes(ragFile)
120122

121-
if (!pdfBytes) {
122-
console.error(`Failed to read PDF text file ${ragFile.filename} in S3`)
123-
throw ApplicationError.InternalServerError('Failed to read PDF text file')
123+
if (!fileBytes) {
124+
console.error(`Failed to read file ${ragFile.filename} in S3`)
125+
throw ApplicationError.InternalServerError('Failed to read file')
124126
}
125-
const pages = await analyzeAndPreparePDFPages(pdfBytes)
127+
128+
const pages = isImage(ragFile)
129+
? [{ text: '', png: fileBytes } as PageInfo]
130+
: await analyzeAndPreparePDFPages(fileBytes)
126131

127132
const baseJobId = crypto.randomBytes(20).toString('hex')
128133

src/server/services/rag/fileStore.ts

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ import { S3_BUCKET } from '../../util/config'
1313
import { s3Client } from '../../util/s3client'
1414

1515
const isPdf = (ragFile: RagFile) => ragFile.fileType === 'application/pdf'
16+
const isImage = (ragFile: RagFile) => ragFile.fileType === 'image/png'
17+
const isBinaryFile = (ragFile: RagFile) => isPdf(ragFile) || isImage(ragFile)
1618
const getPdfTextKey = (s3Key: string) => `${s3Key}.md`
1719

1820
export const FileStore = {
@@ -62,7 +64,7 @@ export const FileStore = {
6264
const s3Key = FileStore.getRagFileKey(ragFile)
6365

6466
try {
65-
if (isPdf(ragFile)) {
67+
if (isBinaryFile(ragFile)) {
6668
const pdfTextKey = getPdfTextKey(s3Key)
6769
await s3Client.send(new DeleteObjectCommand({ Bucket: S3_BUCKET, Key: pdfTextKey }))
6870
return true
@@ -90,7 +92,7 @@ export const FileStore = {
9092
async readRagFileTextContent(ragFile: RagFile) {
9193
const s3Key = FileStore.getRagFileKey(ragFile)
9294

93-
if (isPdf(ragFile)) {
95+
if (isBinaryFile(ragFile)) {
9496
const pdfTextKey = getPdfTextKey(s3Key)
9597
try {
9698
const textObj = await s3Client.send(new GetObjectCommand({ Bucket: S3_BUCKET, Key: pdfTextKey }))

src/server/services/rag/ingestion.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import { Document } from '@langchain/core/documents'
22
import { MarkdownTextSplitter, RecursiveCharacterTextSplitter } from '@langchain/textsplitters'
33
import { RagFile, type RagIndex } from '../../db/models'
4-
import { pdfQueueEvents, simplyParsePdf, submitAdvancedPdfParsingJobs } from '../jobs/pdfParsing.job'
4+
import { pdfQueueEvents, simplyParsePdf, submitAdvancedParsingJobs } from '../jobs/pdfParsing.job'
55
import { FileStore } from './fileStore'
66
import logger from 'src/server/util/logger'
77
import type { IngestionJobStatus, IngestionPipelineStageKey } from '@shared/ingestion'
@@ -67,7 +67,7 @@ export const ingestRagFile = async (ragFile: RagFile, ragIndex: RagIndex) => {
6767

6868
if (needToParseWithVlm) {
6969
// Advanced PDF parsing with job processing.
70-
const pages = await submitAdvancedPdfParsingJobs(ragFile)
70+
const pages = await submitAdvancedParsingJobs(ragFile)
7171

7272
try {
7373
const start = 5

0 commit comments

Comments
 (0)