Skip to content

Commit cebc3b8

Browse files
committed
upload UX
1 parent 208b24c commit cebc3b8

File tree

12 files changed

+273
-54
lines changed

12 files changed

+273
-54
lines changed

src/client/Router.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ import Discussion from './components/Courses/Course/Discussion'
1111
import NoAccess from './components/NoAccess'
1212
import Chats from './components/Chats'
1313
import Statistics from './components/Statistics'
14-
import Rag from './components/Rag'
14+
import Rag from './components/Rag/Rag'
1515
import { ChatV2 } from './components/ChatV2/ChatV2'
1616

1717
const router = createBrowserRouter(
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
import { Box, Typography } from '@mui/material'
2+
import { useEffect, useReducer } from 'react'
3+
4+
type ProgressEvent = {
5+
stage: string
6+
item?: string
7+
done?: boolean
8+
error?: string
9+
}
10+
11+
type ProgressState = {
12+
[fileName: string]: {
13+
[stage: string]: {
14+
count: number
15+
done: boolean
16+
}
17+
}
18+
}
19+
20+
type Action = { type: 'UPDATE'; payload: ProgressEvent } | { type: 'RESET' }
21+
22+
const progressReducer = (state: ProgressState, action: Action): ProgressState => {
23+
switch (action.type) {
24+
case 'UPDATE': {
25+
const { stage, item, done } = action.payload
26+
if (done) {
27+
// mark all items at this stage as done
28+
const newState = { ...state }
29+
for (const file in newState) {
30+
if (newState[file][stage]) {
31+
newState[file][stage].done = true
32+
}
33+
}
34+
return newState
35+
}
36+
if (!item) return state
37+
38+
const fileStages = state[item] || {}
39+
const stageData = fileStages[stage] || { count: 0, done: false }
40+
41+
return {
42+
...state,
43+
[item]: {
44+
...fileStages,
45+
[stage]: {
46+
count: stageData.count + 1,
47+
done: stageData.done || false,
48+
},
49+
},
50+
}
51+
}
52+
53+
case 'RESET':
54+
return {}
55+
56+
default:
57+
return state
58+
}
59+
}
60+
61+
export const ProgressReporter: React.FC<{ stream: ReadableStream | null }> = ({ stream }) => {
62+
const [progress, dispatch] = useReducer(progressReducer, {})
63+
64+
useEffect(() => {
65+
let reader: ReadableStreamDefaultReader<Uint8Array> | null = null
66+
if (stream) {
67+
// Check if the stream is readable
68+
try {
69+
reader = stream.getReader()
70+
} catch (error) {
71+
console.error('Error getting reader from stream:', error)
72+
return
73+
}
74+
75+
const decoder = new TextDecoder('utf-8')
76+
77+
const readStream = async () => {
78+
while (true) {
79+
const { value, done } = await reader.read()
80+
if (done) break
81+
82+
const chunkText = decoder.decode(value, { stream: true })
83+
const lines = chunkText.split('\n')
84+
85+
for (const line of lines) {
86+
if (line.trim()) {
87+
console.log('Received chunk:', line)
88+
try {
89+
const jsonChunk = JSON.parse(line)
90+
dispatch({ type: 'UPDATE', payload: jsonChunk })
91+
} catch (err) {
92+
console.error('Invalid chunk:', line, err)
93+
}
94+
}
95+
}
96+
}
97+
}
98+
99+
readStream()
100+
101+
return () => {
102+
if (reader) {
103+
reader.releaseLock()
104+
}
105+
}
106+
}
107+
}, [stream])
108+
109+
return (
110+
<Box>
111+
{Object.entries(progress).map(([file, stages]) => (
112+
<div key={file} className="border p-2 rounded shadow">
113+
<h3 className="font-bold text-lg">{file}</h3>
114+
<ul className="ml-4 list-disc">
115+
{Object.entries(stages).map(([stage, { count, done }]) => (
116+
<li key={stage}>
117+
<span className="font-medium">{stage}:</span> {done ? "✅ Done" : `🔄 Processing (${count}x)`}
118+
</li>
119+
))}
120+
</ul>
121+
</div>
122+
))}
123+
</Box>
124+
)
125+
}

src/client/components/Rag.tsx renamed to src/client/components/Rag/Rag.tsx

Lines changed: 43 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
import React, { useState } from 'react'
22
import { TextField, Button, Box, Typography, Table, TableHead, TableBody, TableRow, TableCell, Paper, IconButton, Dialog, DialogTitle, styled } from '@mui/material'
3-
import apiClient from '../util/apiClient'
3+
import apiClient, { postAbortableStream } from '../../util/apiClient'
44
import { useMutation, useQuery } from '@tanstack/react-query'
55
import { CloudUpload, Settings } from '@mui/icons-material'
6-
import Markdown from './Banner/Markdown'
6+
import Markdown from '../Banner/Markdown'
77
import { useSnackbar } from 'notistack'
8+
import { ProgressReporter } from './ProgressReporter'
89

910
type RagResponse = {
1011
id: string
@@ -69,13 +70,12 @@ const useUploadMutation = (index: RagIndexAttributes | null) => {
6970
formData.append('files', file)
7071
})
7172

72-
const response = await apiClient.put(`/rag/indices/${index.id}/upload`, formData, {
73-
headers: {
74-
'Content-Type': 'multipart/form-data',
75-
},
76-
})
73+
const { stream } = await postAbortableStream(`/rag/indices/${index.id}/upload`, formData)
74+
if (!stream) {
75+
throw new Error('No stream returned from server')
76+
}
7777

78-
return response.data
78+
return stream
7979
},
8080
})
8181
return mutation
@@ -105,6 +105,7 @@ const Rag: React.FC = () => {
105105
const [response, setResponse] = useState<RagResponse[] | null>(null)
106106
const uploadMutation = useUploadMutation(selectedIndex)
107107
const [modalOpen, setModalOpen] = useState(false)
108+
const [stream, setStream] = useState<ReadableStream | null>(null)
108109

109110
const handleSubmit = async (event: React.FormEvent) => {
110111
event.preventDefault()
@@ -131,39 +132,40 @@ const Rag: React.FC = () => {
131132
<Box sx={{ display: 'flex', gap: 2 }}>
132133
<Dialog open={!!selectedIndex && modalOpen} onClose={() => setModalOpen(false)}>
133134
<DialogTitle>Edit {selectedIndex?.metadata?.name}</DialogTitle>
134-
<Box sx={{ padding: 2, display: 'flex', gap: 2 }}>
135-
<Button component="label" role={undefined} variant="contained" tabIndex={-1} startIcon={<CloudUpload />} disabled={uploadMutation.isPending}>
136-
{uploadMutation.isPending ? 'Uploading...' : 'Upload Files'}
137-
<VisuallyHiddenInput
138-
type="file"
139-
onChange={async (event) => {
140-
const files = event.target.files
141-
console.log('Files selected:', files)
142-
if (files && files.length > 0) {
143-
await uploadMutation.mutateAsync(files)
135+
<Box sx={{ padding: 2 }}>
136+
<Box sx={{ display: 'flex', gap: 2 }}>
137+
<Button component="label" role={undefined} variant="contained" tabIndex={-1} startIcon={<CloudUpload />} disabled={uploadMutation.isPending}>
138+
{uploadMutation.isPending ? 'Uploading...' : 'Upload Files'}
139+
<VisuallyHiddenInput
140+
type="file"
141+
onChange={async (event) => {
142+
const files = event.target.files
143+
console.log('Files selected:', files)
144+
if (files && files.length > 0) {
145+
const stream = await uploadMutation.mutateAsync(files)
146+
setStream(stream)
147+
}
148+
}}
149+
multiple
150+
/>
151+
</Button>
152+
<Button
153+
variant="text"
154+
color="error"
155+
onClick={async () => {
156+
if (selectedIndex && window.confirm(`Are you sure you want to delete index ${selectedIndex.metadata.name}?`)) {
157+
await deleteIndexMutation.mutateAsync(selectedIndex.id)
158+
setSelectedIndex(null)
144159
refetch()
145-
setModalOpen(false)
146-
enqueueSnackbar('Files uploaded successfully', {
147-
variant: 'success',
148-
})
149160
}
150161
}}
151-
multiple
152-
/>
153-
</Button>
154-
<Button
155-
variant="text"
156-
color="error"
157-
onClick={async () => {
158-
if (selectedIndex && window.confirm(`Are you sure you want to delete index ${selectedIndex.metadata.name}?`)) {
159-
await deleteIndexMutation.mutateAsync(selectedIndex.id)
160-
setSelectedIndex(null)
161-
refetch()
162-
}
163-
}}
164-
>
165-
Delete Index
166-
</Button>
162+
>
163+
Delete Index
164+
</Button>
165+
</Box>
166+
<Box mt={2}>
167+
<ProgressReporter stream={stream} />
168+
</Box>
167169
</Box>
168170
</Dialog>
169171
<Box>
@@ -247,7 +249,9 @@ const Rag: React.FC = () => {
247249
{response.map((doc) => (
248250
<Paper key={doc.id} sx={{ marginBottom: 2, p: 1 }} elevation={2}>
249251
<Typography variant="caption">Score: {doc.value.score}</Typography>
250-
<Typography variant="subtitle1" fontFamily="monospace" mb={2}>{JSON.stringify(doc.value.metadata, null, 2)}</Typography>
252+
<Typography variant="subtitle1" fontFamily="monospace" mb={2}>
253+
{JSON.stringify(doc.value.metadata, null, 2)}
254+
</Typography>
251255
{doc.value.metadata.type === 'md' ? (
252256
<Markdown>{doc.value.content}</Markdown>
253257
) : (

src/server/routes/rag.ts

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import { ingestionPipeline } from '../services/rag/ingestion/pipeline'
99
import { getAzureOpenAIClient } from '../util/azure'
1010
import multer from 'multer'
1111
import { mkdir, rm, stat } from 'fs/promises'
12+
import { Readable } from 'stream'
1213

1314
const router = Router()
1415

@@ -115,7 +116,7 @@ const indexUploadDirMiddleware = async (req: Request, _res: Response, next: Next
115116
next()
116117
}
117118

118-
router.put('/indices/:id/upload', [indexUploadDirMiddleware, uploadMiddleware], async (req, res) => {
119+
router.post('/indices/:id/upload', [indexUploadDirMiddleware, uploadMiddleware], async (req, res) => {
119120
const { user } = req as unknown as RequestWithUser
120121
const id = IndexIdSchema.parse(req.params.id)
121122

@@ -132,11 +133,14 @@ router.put('/indices/:id/upload', [indexUploadDirMiddleware, uploadMiddleware],
132133
return
133134
}
134135

136+
res.setHeader('Content-Type', 'application/x-ndjson')
137+
res.setHeader('Transfer-Encoding', 'chunked')
138+
135139
const openAiClient = getAzureOpenAIClient(EMBED_MODEL)
136140

137-
await ingestionPipeline(openAiClient, `uploads/rag/${id}`, ragIndex)
141+
const progressReporter = await ingestionPipeline(openAiClient, `uploads/rag/${id}`, ragIndex)
138142

139-
res.json({ message: 'Files uploaded and processed' })
143+
progressReporter.pipe(res)
140144
})
141145

142146
const RagIndexQuerySchema = z.object({

src/server/services/rag/ingestion/chunker.ts

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,11 @@ import { chunkingAlgorithms } from './chunkingAlgorithms.ts'
33
import { mkdirSync } from 'node:fs'
44
import { writeFile } from 'node:fs/promises'
55
import { TextData } from './textExtractor.ts'
6+
import { StageReporter } from './progressReporter.ts'
67

78
export class Chunker extends Transform {
89
private cachePath: string
10+
public progressReporter: StageReporter
911

1012
constructor(cachePath: string) {
1113
super({ objectMode: true })
@@ -16,7 +18,7 @@ export class Chunker extends Transform {
1618
mkdirSync(this.cachePath, { recursive: true })
1719
}
1820

19-
_transform(data: TextData, _encoding: BufferEncoding, callback: (error?: Error | null) => void) {
21+
async _transform(data: TextData, _encoding: BufferEncoding, callback: (error?: Error | null) => void) {
2022
const chunkingAlgorithm = chunkingAlgorithms[data.chunkingStrategy]
2123

2224
const chunks = chunkingAlgorithm(data)
@@ -26,13 +28,20 @@ export class Chunker extends Transform {
2628

2729
// Save chunks to cache
2830

29-
Promise.all(
31+
await Promise.all(
3032
chunks.map((chunk) => {
3133
const chunkPath = `${this.cachePath}/${chunk.id}.json`
3234
return writeFile(chunkPath, JSON.stringify(chunk, null, 2), 'utf-8')
3335
}),
34-
).then(() => {
35-
callback()
36-
})
36+
)
37+
38+
this.progressReporter.reportProgress(data.fileName)
39+
40+
callback()
41+
}
42+
43+
_flush(callback: (error?: Error | null) => void) {
44+
this.progressReporter.reportDone()
45+
callback()
3746
}
3847
}

src/server/services/rag/ingestion/chunkingAlgorithms.ts

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,11 @@ import { TextData } from './textExtractor.ts'
33
export type Chunk = {
44
id: string
55
content: string[]
6-
metadata?: {
7-
[key: string]: any
6+
metadata: {
7+
title?: string
8+
titleHierarchy?: string[]
9+
type: string
10+
filename: string
811
}
912
}
1013

@@ -28,6 +31,7 @@ export const createTitleChunks = (file: TextData): Chunk[] => {
2831
title,
2932
titleHierarchy: [...titleHierarchy],
3033
type: file.type,
34+
filename: file.fileName,
3135
},
3236
})
3337

@@ -62,6 +66,7 @@ export const createTitleChunks = (file: TextData): Chunk[] => {
6266
title,
6367
titleHierarchy: [...titleHierarchy],
6468
type: file.type,
69+
filename: file.fileName,
6570
},
6671
})
6772
}
@@ -82,8 +87,9 @@ export const createSplittedTitleChunks = (file: TextData): Chunk[] => {
8287
content: section.split('\n'),
8388
metadata: {
8489
title: `${title} - ${index + 1}`,
85-
titleHierarchy: [...titleHierarchy, index + 1],
90+
titleHierarchy: [...titleHierarchy, index + 1 + ''],
8691
type: file.type,
92+
filename: file.fileName,
8793
},
8894
}))
8995
})
@@ -103,6 +109,7 @@ export const createStaticChunks = (file: TextData, length: number = 800, overlap
103109
metadata: {
104110
title: file.fileName,
105111
type: file.type,
112+
filename: file.fileName,
106113
},
107114
})
108115
}

0 commit comments

Comments
 (0)