Skip to content

Commit cb35bc5

Browse files
committed
Ability to retry file parsing
1 parent 08bc89a commit cb35bc5

File tree

7 files changed

+99
-27
lines changed

7 files changed

+99
-27
lines changed

src/client/components/Rag/RagFile.tsx

Lines changed: 40 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,16 @@ import { useQuery } from '@tanstack/react-query'
22
import { useParams, Link as RouterLink, useNavigate } from 'react-router-dom'
33
import apiClient from '../../util/apiClient'
44
import type { RagFileAttributes } from '../../../shared/types'
5-
import { Button, Container, Link, Typography } from '@mui/material'
5+
import { Box, Button, Container, Link, Typography } from '@mui/material'
66
import { RagFileInfo } from './RagFileDetails'
77
import type { RagIndexAttributes } from '../../../server/db/models/ragIndex'
88
import { Chunk } from './Chunk'
9-
import { useDeleteRagFileMutation } from './api'
9+
import { useDeleteRagFileMutation, useDeleteRagFileTextMutation } from './api'
1010
import { useTranslation } from 'react-i18next'
1111
import Markdown from 'react-markdown'
1212
import { enqueueSnackbar } from 'notistack'
13+
import { OutlineButtonBlack } from '../ChatV2/general/Buttons'
14+
import { Autorenew } from '@mui/icons-material'
1315

1416
type RagFile = RagFileAttributes & {
1517
fileContent: string
@@ -32,6 +34,7 @@ export const RagFile: React.FC = () => {
3234
},
3335
})
3436
const deleteMutation = useDeleteRagFileMutation()
37+
const deleteTextMutation = useDeleteRagFileTextMutation()
3538
const navigate = useNavigate()
3639

3740
if (isError) {
@@ -51,24 +54,42 @@ export const RagFile: React.FC = () => {
5154
<Typography variant="h3">
5255
{ragFile.ragIndex.metadata?.name} / {ragFile.filename}
5356
</Typography>
54-
<Button
55-
variant="text"
56-
color="error"
57-
sx={{ my: 2 }}
58-
onClick={async () => {
59-
if (window.confirm('Are you sure you want to delete this file?')) {
60-
await deleteMutation.mutateAsync({
61-
indexId: ragFile.ragIndex.id,
62-
fileId: ragFile.id,
63-
})
64-
enqueueSnackbar(t('rag:fileDeleted'), { variant: 'success' })
57+
<Box sx={{ my: 2, display: 'flex', gap: 2 }}>
58+
{ragFile.fileType === 'application/pdf' && (
59+
<OutlineButtonBlack
60+
startIcon={<Autorenew />}
61+
onClick={async () => {
62+
await deleteTextMutation.mutateAsync({
63+
indexId: ragFile.ragIndex.id,
64+
fileId: ragFile.id,
65+
})
66+
enqueueSnackbar(t('rag:fileTextDeleted'), { variant: 'success' })
6567

66-
navigate(`/rag/${ragFile.ragIndex.id}`)
67-
}
68-
}}
69-
>
70-
{t('rag:deleteFile')}
71-
</Button>
68+
navigate(`/rag/${ragFile.ragIndex.id}`)
69+
}}
70+
sx={{ my: 2 }}
71+
>
72+
{t('rag:deleteFileText')}
73+
</OutlineButtonBlack>
74+
)}
75+
<Button
76+
variant="text"
77+
color="error"
78+
onClick={async () => {
79+
if (window.confirm('Are you sure you want to delete this file?')) {
80+
await deleteMutation.mutateAsync({
81+
indexId: ragFile.ragIndex.id,
82+
fileId: ragFile.id,
83+
})
84+
enqueueSnackbar(t('rag:fileDeleted'), { variant: 'success' })
85+
86+
navigate(`/rag/${ragFile.ragIndex.id}`)
87+
}
88+
}}
89+
>
90+
{t('rag:deleteFile')}
91+
</Button>
92+
</Box>
7293
<RagFileInfo file={ragFile} />
7394
<Typography variant="h4">{t('rag:content')}</Typography>
7495
{ragFile.fileContent.length === 0 ? (

src/client/components/Rag/RagIndex.tsx

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import React from 'react'
22
import { Button, Box, Typography, styled, LinearProgress, Container, DialogTitle, DialogContent, Dialog, Link, CircularProgress } from '@mui/material'
33
import { useNavigate, useParams, Link as RouterLink } from 'react-router-dom'
4-
import { ArrowBackOutlined, CloudUpload, DeleteOutline, FindInPage } from '@mui/icons-material'
4+
import { ArrowBackOutlined, Autorenew, CloudUpload, DeleteOutline, FindInPage } from '@mui/icons-material'
55
import { orderBy } from 'lodash'
66
import { RagFileInfo } from './RagFileDetails'
77
import { useDeleteRagIndexMutation, useRagIndexDetails, useUploadMutation } from './api'
@@ -126,6 +126,7 @@ export const RagIndex: React.FC = () => {
126126
{t('rag:processingFailures')}
127127
</Typography>
128128
<OutlineButtonBlack
129+
startIcon={<Autorenew />}
129130
onClick={async () => {
130131
await handleUpload([])
131132
}}
@@ -136,6 +137,7 @@ export const RagIndex: React.FC = () => {
136137
)}
137138
{user?.isAdmin && (
138139
<OutlineButtonBlack
140+
startIcon={<Autorenew />}
139141
onClick={async () => {
140142
await handleUpload([])
141143
}}

src/client/components/Rag/api.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,3 +78,13 @@ export const useDeleteRagFileMutation = () => {
7878
})
7979
return mutation
8080
}
81+
82+
export const useDeleteRagFileTextMutation = () => {
83+
const mutation = useMutation({
84+
mutationFn: async ({ indexId, fileId }: { indexId: number; fileId: number }) => {
85+
const response = await apiClient.delete(`/rag/indices/${indexId}/files/${fileId}/text`)
86+
return response.data
87+
},
88+
})
89+
return mutation
90+
}

src/client/locales/en.json

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -392,6 +392,9 @@
392392
"fileSize": "File size (characters)",
393393
"fileStatus": "Status",
394394
"collectionDeleted": "Collection deleted",
395-
"fileDeleted": "File deleted"
395+
"fileDeleted": "File deleted",
396+
"fileTextDeleted": "Rescanning started",
397+
"deleteFileText": "Rescan PDF",
398+
"onlyPdf": "a"
396399
}
397-
}
400+
}

src/client/locales/fi.json

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -394,6 +394,9 @@
394394
"fileSize": "Tiedoston koko (merkkiä)",
395395
"fileStatus": "Tila",
396396
"collectionDeleted": "Kokoelma poistettu",
397-
"fileDeleted": "Tiedosto poistettu"
397+
"fileDeleted": "Tiedosto poistettu",
398+
"fileTextDeleted": "Uudelleenskannaus aloitettu",
399+
"deleteFileText": "Skannaa PDF uudestaan",
400+
"onlyPdf": "a"
398401
}
399-
}
402+
}

src/server/routes/rag/ragIndex.ts

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ ragIndexRouter.delete('/files/:fileId', async (req, res) => {
142142
throw ApplicationError.NotFound('File not found')
143143
}
144144

145-
// Delete file from disk
145+
// Delete file from s3
146146
await FileStore.deleteRagFileDocument(ragFile)
147147

148148
// Delete RagFile record
@@ -153,7 +153,31 @@ ragIndexRouter.delete('/files/:fileId', async (req, res) => {
153153
console.error('Error ingesting RAG files:', error)
154154
})
155155

156-
res.json({ message: 'File deleted successfully' })
156+
res.json({ message: 'File deleted successfully, re-ingesting' })
157+
})
158+
159+
ragIndexRouter.delete('/files/:fileId/text', async (req, res) => {
160+
const ragIndexRequest = req as unknown as RagIndexRequest
161+
const ragIndex = ragIndexRequest.ragIndex
162+
const fileId = RagFileIdSchema.parse(req.params.fileId)
163+
164+
const ragFile = await RagFile.findOne({
165+
where: { id: fileId, ragIndexId: ragIndex.id },
166+
})
167+
168+
if (!ragFile) {
169+
throw ApplicationError.NotFound('File not found')
170+
}
171+
172+
// Delete the text version file from s3 if it exists
173+
await FileStore.deleteRagFileText(ragFile)
174+
175+
// Now we need to re-ingest
176+
ingestRagFiles(ragIndex).catch((error) => {
177+
console.error('Error ingesting RAG files:', error)
178+
})
179+
180+
res.json({ message: 'File text version deleted successfully, re-ingesting' })
157181
})
158182

159183
const upload = multer({

src/server/services/rag/fileStore.ts

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,18 +53,27 @@ export const FileStore = {
5353
}
5454
},
5555

56-
async deleteRagFileDocument(ragFile: RagFile) {
56+
async deleteRagFileText(ragFile: RagFile) {
5757
const s3Key = FileStore.getRagFileKey(ragFile)
5858

5959
try {
6060
if (isPdf(s3Key)) {
6161
const pdfTextKey = getPdfTextKey(s3Key)
6262
await s3Client.send(new DeleteObjectCommand({ Bucket: S3_BUCKET, Key: pdfTextKey }))
63+
return true
6364
}
6465
} catch (error) {
6566
console.error(`Failed to delete file ${getPdfTextKey(s3Key)} from S3:`, error)
6667
}
6768

69+
return false
70+
},
71+
72+
async deleteRagFileDocument(ragFile: RagFile) {
73+
await FileStore.deleteRagFileText(ragFile)
74+
75+
const s3Key = FileStore.getRagFileKey(ragFile)
76+
6877
try {
6978
await s3Client.send(new DeleteObjectCommand({ Bucket: S3_BUCKET, Key: s3Key }))
7079
} catch (error) {

0 commit comments

Comments
 (0)