Skip to content

Commit a7d86c4

Browse files
committed
index language support
1 parent 127a6ec commit a7d86c4

File tree

8 files changed

+45
-10
lines changed

8 files changed

+45
-10
lines changed

src/client/components/Rag/Rag.tsx

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,22 @@
11
import React, { useState } from 'react'
2-
import { TextField, Button, Box, Typography, Table, TableHead, TableBody, TableRow, TableCell, Paper, Link, Container } from '@mui/material'
2+
import {
3+
TextField,
4+
Button,
5+
Box,
6+
Typography,
7+
Table,
8+
TableHead,
9+
TableBody,
10+
TableRow,
11+
TableCell,
12+
Paper,
13+
Link,
14+
Container,
15+
FormControl,
16+
Select,
17+
MenuItem,
18+
InputLabel,
19+
} from '@mui/material'
320
import { useNavigate, Link as RouterLink, useParams } from 'react-router-dom'
421
import { useCourseRagIndices, useRagIndices } from '../../hooks/useRagIndices'
522
import { useCreateRagIndexMutation } from './api'
@@ -12,6 +29,7 @@ const Rag: React.FC = () => {
1229
const { ragIndices } = useCourseRagIndices(chatInstance?.id, true)
1330
const createIndexMutation = useCreateRagIndexMutation()
1431
const [indexName, setIndexName] = useState('')
32+
const [language, setLanguage] = useState<'Finnish' | 'English'>('English')
1533

1634
return (
1735
<Container sx={{ display: 'flex', gap: 2, mt: '4rem', mb: '10rem' }} maxWidth="xl">
@@ -29,13 +47,21 @@ const Rag: React.FC = () => {
2947
onChange={(e) => setIndexName(e.target.value)}
3048
fullWidth
3149
/>
50+
<FormControl fullWidth>
51+
<InputLabel id="language-label">Language</InputLabel>
52+
<Select labelId="language-label" id="language-select" value={language} onChange={(e) => setLanguage(e.target.value as 'Finnish' | 'English')}>
53+
<MenuItem value={'Finnish'}>Finnish</MenuItem>
54+
<MenuItem value={'English'}>English</MenuItem>
55+
</Select>
56+
</FormControl>
3257
<Button
3358
variant="contained"
3459
color="primary"
3560
onClick={async () => {
3661
const newIndex = await createIndexMutation.mutateAsync({
3762
chatInstanceId: chatInstance?.id,
3863
indexName,
64+
language,
3965
})
4066
setIndexName('')
4167
navigate(`/rag/${newIndex.id}`)

src/client/components/Rag/api.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,12 @@ import { RagFileAttributes, RagIndexAttributes } from '../../../shared/types'
44

55
export const useCreateRagIndexMutation = () => {
66
const mutation = useMutation({
7-
mutationFn: async ({ chatInstanceId, indexName }: { chatInstanceId: string; indexName: string }) => {
7+
mutationFn: async ({ chatInstanceId, indexName, language }: { chatInstanceId: string; indexName: string; language: string }) => {
8+
console.log(language)
89
const response = await apiClient.post('/rag/indices', {
910
name: indexName,
1011
chatInstanceId,
12+
language,
1113
})
1214
return response.data
1315
},

src/server/routes/rag/rag.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ const router = Router()
1313

1414
const IndexCreationSchema = z.object({
1515
name: z.string().min(1).max(100),
16+
language: z.enum(['Finnish', 'English']).optional(),
1617
chatInstanceId: z.string().min(1).max(100),
1718
dim: z.number().min(EMBED_DIM).max(EMBED_DIM).default(EMBED_DIM),
1819
})
@@ -24,7 +25,7 @@ const hasChatInstanceRagPermission = (user: User, chatInstance: ChatInstance) =>
2425

2526
router.post('/indices', async (req, res) => {
2627
const { user } = req as RequestWithUser
27-
const { name, dim, chatInstanceId } = IndexCreationSchema.parse(req.body)
28+
const { name, dim, chatInstanceId, language } = IndexCreationSchema.parse(req.body)
2829

2930
const chatInstance = await ChatInstance.findByPk(chatInstanceId, {
3031
include: [
@@ -70,6 +71,7 @@ router.post('/indices', async (req, res) => {
7071
dim,
7172
// azureVectorStoreId: vectorStore.id,
7273
ragIndexFilterValue: randomUUID(),
74+
language,
7375
},
7476
})
7577

src/server/routes/rag/ragIndex.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,7 @@ ragIndexRouter.post('/upload', [indexUploadDirMiddleware, uploadMiddleware], asy
209209

210210
await Promise.all(
211211
ragFiles.map(async (rf) => {
212-
await ingestRagFile(rf)
212+
await ingestRagFile(rf, ragIndex.metadata.language)
213213
rf.pipelineStage = 'completed'
214214
await rf.save()
215215
}),

src/server/services/rag/ingestion.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ const markdownTextSplitter = new MarkdownTextSplitter({
1616

1717
const isMarkdown = (mimetype: string) => mimetype === 'text/markdown'
1818

19-
export const ingestRagFile = async (ragFile: RagFile) => {
19+
export const ingestRagFile = async (ragFile: RagFile, language: 'Finnish' | 'English' = 'English') => {
2020
console.time(`Ingestion ${ragFile.filename}`)
2121

2222
const text = await FileStore.readRagFileTextContent(ragFile)
@@ -33,7 +33,8 @@ export const ingestRagFile = async (ragFile: RagFile) => {
3333
chunkDocument.id = `ragIndex-${ragFile.ragIndexId}-${ragFile.filename}-${idx}`
3434
})
3535

36-
const vectorStore = getRedisVectorStore(ragFile.ragIndexId)
36+
console.log(language)
37+
const vectorStore = getRedisVectorStore(ragFile.ragIndexId, language)
3738

3839
await vectorStore.addDocuments(chunkDocuments)
3940
console.timeEnd(`Ingestion ${ragFile.filename}`)

src/server/services/rag/search.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,12 @@ export const search = async (query: string, ragIndex: RagIndex): Promise<RagChun
99
console.log('Searching', ragIndex.metadata.name, 'for query:', query)
1010
const vectorStore = getRedisVectorStore(ragIndex.id)
1111

12-
const vectorstoreRetriever = vectorStore.asRetriever(8)
12+
// const vectorstoreRetriever = vectorStore.asRetriever(0)
1313
const ftSearchRetriever = new FTSearchRetriever(vectorStore.indexName)
1414

1515
const retriever = new EnsembleRetriever({
16-
retrievers: [vectorstoreRetriever, ftSearchRetriever],
17-
weights: [0.3, 0.7],
16+
retrievers: [ftSearchRetriever],
17+
weights: [1],
1818
})
1919

2020
const results0 = await retriever.invoke(query)

src/server/services/rag/vectorStore.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,14 @@ export const getChromaVectorStore = (ragIndexId: number) => {
2828
})
2929
}
3030

31-
export const getRedisVectorStore = (ragIndexId: number) => {
31+
export const getRedisVectorStore = (ragIndexId: number, language?: string) => {
3232
return new RedisVectorStore(getEmbedder(), {
3333
// @ts-expect-error something wrong with typing, but it should actually match the signature.
3434
redisClient,
3535
indexName: `ragIndex-${String(ragIndexId)}`,
36+
createIndexOptions: {
37+
LANGUAGE: language as 'Finnish' | 'English' | undefined,
38+
},
3639
})
3740
}
3841

src/shared/types.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ export type RagIndexMetadata = {
99
azureVectorStoreId?: string
1010
ragIndexFilterValue: string
1111
instructions?: string
12+
language?: 'Finnish' | 'English'
1213
}
1314

1415
export type RagFileMetadata = {

0 commit comments

Comments
 (0)