Skip to content

Commit 127bc2a

Browse files
authored
Merge pull request #15 from tinybirdco/vector-search
Vector search
2 parents e595e92 + 32d7e9b commit 127bc2a

File tree

13 files changed

+6843
-4960
lines changed

13 files changed

+6843
-4960
lines changed

dashboard/ai-analytics/package-lock.json

Lines changed: 708 additions & 9 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

dashboard/ai-analytics/package.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
"@remixicon/react": "^4.6.0",
2020
"@tanstack/react-query": "^5.67.2",
2121
"@tremor/react": "^3.18.7",
22+
"@xenova/transformers": "^2.17.2",
2223
"ai": "^4.1.61",
2324
"class-variance-authority": "^0.7.1",
2425
"clsx": "^2.1.1",
@@ -30,6 +31,7 @@
3031
"react": "^18.2.0",
3132
"react-day-picker": "^8.10.1",
3233
"react-dom": "^18.2.0",
34+
"server-only": "^0.0.1",
3335
"tailwind-merge": "^3.0.2",
3436
"tailwindcss-animate": "^1.0.7",
3537
"zod": "^3.24.2",
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
// dashboard/ai-analytics/src/app/api/generate-embedding/route.ts
2+
import { NextResponse } from 'next/server';
3+
import { pipeline } from '@xenova/transformers';
4+
import type { FeatureExtractionPipeline } from '@xenova/transformers/types/pipelines';
5+
6+
// Cache the model to avoid reloading it for every request
7+
let embeddingPipeline: FeatureExtractionPipeline | null = null;
8+
9+
async function getEmbeddingModel() {
10+
if (!embeddingPipeline) {
11+
embeddingPipeline = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2') as FeatureExtractionPipeline;
12+
}
13+
return embeddingPipeline;
14+
}
15+
16+
export async function POST(req: Request) {
17+
try {
18+
const { text } = await req.json();
19+
20+
if (!text || typeof text !== 'string') {
21+
return NextResponse.json(
22+
{ error: 'Invalid input: text must be a non-empty string' },
23+
{ status: 400 }
24+
);
25+
}
26+
27+
// Get the model
28+
const model = await getEmbeddingModel();
29+
30+
// Generate embedding
31+
const output = await model(text, { pooling: 'mean', normalize: true });
32+
const embedding = Array.from(output.data);
33+
34+
return NextResponse.json({ embedding });
35+
} catch (error) {
36+
console.error('Error generating embedding:', error);
37+
return NextResponse.json(
38+
{ error: 'Failed to generate embedding' },
39+
{ status: 500 }
40+
);
41+
}
42+
}

dashboard/ai-analytics/src/app/components/DataTable.tsx

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,13 @@ interface LLMMessage {
2727
cost: number;
2828
response_status: string;
2929
exception: string | null;
30+
similarity?: number;
3031
}
3132

3233
interface DataTableProps {
3334
data?: { data: LLMMessage[] };
3435
isLoading?: boolean;
36+
searchHighlight?: string | null;
3537
}
3638

3739
// Mock data for development and testing
@@ -58,7 +60,11 @@ const MOCK_DATA = {
5860
]
5961
};
6062

61-
export default function DataTable({ data = MOCK_DATA, isLoading = false }: DataTableProps) {
63+
export default function DataTable({
64+
data = MOCK_DATA,
65+
isLoading = false,
66+
searchHighlight = null
67+
}: DataTableProps) {
6268
if (isLoading) {
6369
return (
6470
<div className="flex items-center justify-center h-full">
@@ -96,9 +102,10 @@ export default function DataTable({ data = MOCK_DATA, isLoading = false }: DataT
96102
<TableHeaderCell>Prompt Tokens</TableHeaderCell>
97103
<TableHeaderCell>Completion Tokens</TableHeaderCell>
98104
<TableHeaderCell>Total Tokens</TableHeaderCell>
99-
<TableHeaderCell>Duration (ms)</TableHeaderCell>
100-
<TableHeaderCell>Cost ($)</TableHeaderCell>
105+
<TableHeaderCell>Duration (s)</TableHeaderCell>
106+
<TableHeaderCell>Cost</TableHeaderCell>
101107
<TableHeaderCell>Status</TableHeaderCell>
108+
{searchHighlight && <TableHeaderCell>Relevance</TableHeaderCell>}
102109
</TableRow>
103110
</TableHead>
104111
<TableBody>
@@ -125,11 +132,20 @@ export default function DataTable({ data = MOCK_DATA, isLoading = false }: DataT
125132
{item.response_status}
126133
</span>
127134
</TableCell>
135+
{searchHighlight && (
136+
<TableCell>
137+
{item.similarity ? (
138+
<span className="px-2 py-1 rounded-full text-xs bg-blue-100 text-blue-800">
139+
{(item.similarity * 100).toFixed(0)}%
140+
</span>
141+
) : '-'}
142+
</TableCell>
143+
)}
128144
</TableRow>
129145
))
130146
) : (
131147
<TableRow>
132-
<TableCell colSpan={12} className="text-center">
148+
<TableCell colSpan={searchHighlight ? 13 : 12} className="text-center">
133149
No messages found.
134150
</TableCell>
135151
</TableRow>
Lines changed: 98 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,112 @@
1+
// dashboard/ai-analytics/src/app/containers/DataTableContainer.tsx
12
'use client';
23

4+
import { useState, useEffect } from 'react';
35
import DataTable from '../components/DataTable';
46
import { useLLMMessages } from '@/hooks/useTinybirdData';
7+
import { Search } from 'lucide-react';
58

69
interface DataTableContainerProps {
710
filters: Record<string, string>;
811
isLoading?: boolean;
912
}
1013

11-
export default function DataTableContainer({ filters, isLoading: parentLoading }: DataTableContainerProps) {
12-
// Use the LLM messages hook
13-
const { data: messagesData, isLoading: messagesLoading } = useLLMMessages(filters);
14+
export default function DataTableContainer({ filters, isLoading = false }: DataTableContainerProps) {
15+
const [searchText, setSearchText] = useState<string | null>(null);
16+
const [searchInput, setSearchInput] = useState('');
17+
const [embedding, setEmbedding] = useState<number[] | null>(null);
18+
const [isGeneratingEmbedding, setIsGeneratingEmbedding] = useState(false);
19+
20+
// Generate embedding when search text changes
21+
useEffect(() => {
22+
async function generateEmbedding() {
23+
if (!searchText) {
24+
setEmbedding(null);
25+
return;
26+
}
27+
28+
setIsGeneratingEmbedding(true);
29+
try {
30+
const response = await fetch('/api/generate-embedding', {
31+
method: 'POST',
32+
headers: {
33+
'Content-Type': 'application/json',
34+
},
35+
body: JSON.stringify({ text: searchText }),
36+
});
37+
38+
if (!response.ok) {
39+
throw new Error('Failed to generate embedding');
40+
}
41+
42+
const data = await response.json();
43+
setEmbedding(data.embedding);
44+
} catch (error) {
45+
console.error('Error generating embedding:', error);
46+
setEmbedding(null);
47+
} finally {
48+
setIsGeneratingEmbedding(false);
49+
}
50+
}
51+
52+
generateEmbedding();
53+
}, [searchText]);
54+
55+
// Use the regular messages hook with embedding when available
56+
const messagesQuery = useLLMMessages({
57+
...filters,
58+
...(embedding ? {
59+
embedding: JSON.stringify(embedding),
60+
similarity_threshold: 0.7
61+
} : {})
62+
});
63+
64+
const handleSearch = (e: React.FormEvent) => {
65+
e.preventDefault();
66+
setSearchText(searchInput.trim() || null);
67+
};
1468

15-
// Combine loading states
16-
const isLoading = parentLoading || messagesLoading;
17-
1869
return (
19-
<div className="h-full p-4">
20-
<DataTable
21-
data={messagesData}
22-
isLoading={isLoading}
23-
/>
70+
<div className="flex flex-col h-full">
71+
<div className="p-4 border-b border-gray-700">
72+
<form onSubmit={handleSearch} className="flex gap-2">
73+
<div className="relative flex-grow">
74+
<input
75+
type="text"
76+
placeholder="Search conversations semantically..."
77+
className="w-full px-4 py-2 bg-gray-800 border border-gray-700 rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500"
78+
value={searchInput}
79+
onChange={(e) => setSearchInput(e.target.value)}
80+
/>
81+
<button
82+
type="submit"
83+
className="absolute inset-y-0 right-0 flex items-center px-3 text-gray-400 hover:text-white"
84+
>
85+
<Search className="w-5 h-5" />
86+
</button>
87+
</div>
88+
{searchText && (
89+
<button
90+
type="button"
91+
className="px-3 py-2 text-sm text-gray-300 bg-gray-700 rounded-lg hover:bg-gray-600"
92+
onClick={() => {
93+
setSearchText(null);
94+
setSearchInput('');
95+
}}
96+
>
97+
Clear
98+
</button>
99+
)}
100+
</form>
101+
</div>
102+
103+
<div className="flex-grow overflow-hidden">
104+
<DataTable
105+
data={messagesQuery.data}
106+
isLoading={isLoading || messagesQuery.isLoading || isGeneratingEmbedding}
107+
searchHighlight={searchText}
108+
/>
109+
</div>
24110
</div>
25111
);
26-
}
112+
}

dashboard/ai-analytics/src/hooks/useTinybirdData.ts

Lines changed: 60 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import { useQuery } from '@tanstack/react-query';
22
import { fetchLLMUsage, fetchGenericCounter, fetchLLMMessages } from '@/services/tinybird';
33
import { useTinybirdToken } from '@/providers/TinybirdProvider';
4+
import { useState, useEffect } from 'react';
45

56
export function useLLMUsage(filters: Record<string, string>) {
67
const { token } = useTinybirdToken();
@@ -25,12 +26,69 @@ export function useGenericCounter(dimension: string, filters: Record<string, str
2526
});
2627
}
2728

28-
export function useLLMMessages(filters: Record<string, string>) {
29+
export function useLLMMessages(filters: Record<string, string | number[] | string | number | undefined>) {
2930
const { token } = useTinybirdToken();
3031

3132
return useQuery({
3233
queryKey: ['llm-messages', filters],
3334
queryFn: () => fetchLLMMessages(token!, filters),
3435
enabled: !!token
3536
});
36-
}
37+
}
38+
39+
import { searchLLMMessagesByVector } from '@/services/tinybird';
40+
41+
export function useLLMVectorSearch(
42+
searchText: string | null,
43+
filters: Record<string, string>
44+
) {
45+
const { token } = useTinybirdToken();
46+
const [embedding, setEmbedding] = useState<number[] | null>(null);
47+
// const [isGeneratingEmbedding, setIsGeneratingEmbedding] = useState(false);
48+
49+
// Generate embedding when search text changes
50+
useEffect(() => {
51+
async function generateEmbedding() {
52+
if (!searchText) {
53+
setEmbedding(null);
54+
return;
55+
}
56+
57+
// setIsGeneratingEmbedding(true);
58+
try {
59+
const response = await fetch('/api/generate-embedding', {
60+
method: 'POST',
61+
headers: {
62+
'Content-Type': 'application/json',
63+
},
64+
body: JSON.stringify({ text: searchText }),
65+
});
66+
67+
if (!response.ok) {
68+
throw new Error('Failed to generate embedding');
69+
}
70+
71+
const data = await response.json();
72+
setEmbedding(data.embedding);
73+
} catch (error) {
74+
console.error('Error generating embedding:', error);
75+
setEmbedding(null);
76+
}
77+
// finally {
78+
// setIsGeneratingEmbedding(false);
79+
// }
80+
}
81+
82+
generateEmbedding();
83+
}, [searchText]);
84+
85+
return useQuery({
86+
queryKey: ['llm-vector-search', searchText, embedding, filters],
87+
queryFn: () => searchLLMMessagesByVector(token!, {
88+
...filters,
89+
embedding: embedding || undefined,
90+
similarity_threshold: 0.7, // Adjust as needed
91+
}),
92+
enabled: !!token && !!embedding,
93+
});
94+
}

0 commit comments

Comments
 (0)