Skip to content

Commit b2d264e

Browse files
authored
Merge pull request #414 from sahilds1/380-track-chatbot-costs
[#380] Track chatbot semantic search costs and performance
2 parents 7179501 + 163e107 commit b2d264e

File tree

4 files changed

+137
-6
lines changed

4 files changed

+137
-6
lines changed

frontend/src/api/apiClient.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,9 @@ const sendAssistantMessage = async (
289289
previousResponseId?: string,
290290
) => {
291291
try {
292-
const response = await publicApi.post(`/v1/api/assistant`, {
292+
// The adminApi interceptor doesn't gracefully omit the JWT token if you're not authenticated
293+
const api = localStorage.getItem("access") ? adminApi : publicApi;
294+
const response = await api.post(`/v1/api/assistant`, {
293295
message,
294296
previous_response_id: previousResponseId,
295297
});
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# Generated by Django 4.2.3 on 2025-11-26 21:02
2+
3+
from django.conf import settings
4+
from django.db import migrations, models
5+
import django.db.models.deletion
6+
import uuid
7+
8+
9+
class Migration(migrations.Migration):
10+
11+
dependencies = [
12+
('api', '0014_alter_medrule_rule_type'),
13+
]
14+
15+
operations = [
16+
migrations.CreateModel(
17+
name='SemanticSearchUsage',
18+
fields=[
19+
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
20+
('guid', models.UUIDField(default=uuid.uuid4, editable=False, unique=True)),
21+
('timestamp', models.DateTimeField(auto_now_add=True)),
22+
('query_text', models.TextField(blank=True, help_text='The search query text', null=True)),
23+
('document_name', models.TextField(blank=True, help_text='Document name filter if used', null=True)),
24+
('document_guid', models.UUIDField(blank=True, help_text='Document GUID filter if used', null=True)),
25+
('num_results_requested', models.IntegerField(default=10, help_text='Number of results requested')),
26+
('encoding_time', models.FloatField(help_text='Time to encode query in seconds')),
27+
('db_query_time', models.FloatField(help_text='Time for database query in seconds')),
28+
('num_results_returned', models.IntegerField(help_text='Number of results returned')),
29+
('min_distance', models.FloatField(blank=True, help_text='Minimum L2 distance (null if no results)', null=True)),
30+
('max_distance', models.FloatField(blank=True, help_text='Maximum L2 distance (null if no results)', null=True)),
31+
('median_distance', models.FloatField(blank=True, help_text='Median L2 distance (null if no results)', null=True)),
32+
('user', models.ForeignKey(blank=True, help_text='User who performed the search (null for unauthenticated users)', null=True, on_delete=django.db.models.deletion.CASCADE, related_name='semantic_searches', to=settings.AUTH_USER_MODEL)),
33+
],
34+
options={
35+
'ordering': ['-timestamp'],
36+
'indexes': [models.Index(fields=['-timestamp'], name='api_semanti_timesta_0b5730_idx'), models.Index(fields=['user', '-timestamp'], name='api_semanti_user_id_e11ecb_idx')],
37+
},
38+
),
39+
]
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
import uuid
2+
3+
from django.db import models
4+
from django.conf import settings
5+
6+
class SemanticSearchUsage(models.Model):
7+
"""
8+
Tracks performance metrics and usage data for embedding searches.
9+
"""
10+
guid = models.UUIDField(unique=True, default=uuid.uuid4, editable=False)
11+
timestamp = models.DateTimeField(auto_now_add=True)
12+
query_text = models.TextField(blank=True, null=True, help_text="The search query text")
13+
document_name = models.TextField(blank=True, null=True, help_text="Document name filter if used")
14+
document_guid = models.UUIDField(blank=True, null=True, help_text="Document GUID filter if used")
15+
num_results_requested = models.IntegerField(default=10, help_text="Number of results requested")
16+
user = models.ForeignKey(
17+
settings.AUTH_USER_MODEL,
18+
on_delete=models.CASCADE,
19+
related_name='semantic_searches',
20+
null=True,
21+
blank=True,
22+
help_text="User who performed the search (null for unauthenticated users)"
23+
)
24+
encoding_time = models.FloatField(help_text="Time to encode query in seconds")
25+
db_query_time = models.FloatField(help_text="Time for database query in seconds")
26+
num_results_returned = models.IntegerField(help_text="Number of results returned")
27+
min_distance = models.FloatField(null=True, blank=True, help_text="Minimum L2 distance (null if no results)")
28+
max_distance = models.FloatField(null=True, blank=True, help_text="Maximum L2 distance (null if no results)")
29+
median_distance = models.FloatField(null=True, blank=True, help_text="Median L2 distance (null if no results)")
30+
31+
32+
class Meta:
33+
ordering = ['-timestamp']
34+
indexes = [
35+
models.Index(fields=['-timestamp']),
36+
models.Index(fields=['user', '-timestamp']),
37+
]
38+
39+
def __str__(self):
40+
total_time = self.encoding_time + self.db_query_time
41+
user_display = self.user.email if self.user else "Anonymous"
42+
return f"Search by {user_display} at {self.timestamp} ({total_time:.3f}s)"

server/api/services/embedding_services.py

Lines changed: 53 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,15 @@
1+
import time
2+
import logging
3+
from statistics import median
4+
15
from django.db.models import Q
26
from pgvector.django import L2Distance
37

48
from .sentencetTransformer_model import TransformerModel
5-
6-
# Adjust import path as needed
79
from ..models.model_embeddings import Embeddings
10+
from ..models.model_search_usage import SemanticSearchUsage
811

12+
logger = logging.getLogger(__name__)
913

1014
def get_closest_embeddings(
1115
user, message_data, document_name=None, guid=None, num_results=10
@@ -38,9 +42,14 @@ def get_closest_embeddings(
3842
- file_id: GUID of the source file
3943
"""
4044

45+
encoding_start = time.time()
4146
transformerModel = TransformerModel.get_instance().model
4247
embedding_message = transformerModel.encode(message_data)
48+
encoding_time = time.time() - encoding_start
4349

50+
db_query_start = time.time()
51+
52+
# Django QuerySets are lazily evaluated
4453
if user.is_authenticated:
4554
# User sees their own files + files uploaded by superusers
4655
closest_embeddings_query = (
@@ -62,18 +71,19 @@ def get_closest_embeddings(
6271
.order_by("distance")
6372
)
6473

65-
# Filter by GUID if provided, otherwise filter by document name if provided
74+
# Filtering to a document GUID takes precedence over a document name
6675
if guid:
6776
closest_embeddings_query = closest_embeddings_query.filter(
6877
upload_file__guid=guid
6978
)
7079
elif document_name:
7180
closest_embeddings_query = closest_embeddings_query.filter(name=document_name)
7281

73-
# Slice the results to limit to num_results
82+
# Slicing is equivalent to SQL's LIMIT clause
7483
closest_embeddings_query = closest_embeddings_query[:num_results]
7584

76-
# Format the results to be returned
85+
# Iterating evaluates the QuerySet and hits the database
86+
# TODO: Research improving the query evaluation performance
7787
results = [
7888
{
7989
"name": obj.name,
@@ -86,4 +96,42 @@ def get_closest_embeddings(
8696
for obj in closest_embeddings_query
8797
]
8898

99+
db_query_time = time.time() - db_query_start
100+
101+
try:
102+
# Handle user having no uploaded docs or doc filtering returning no matches
103+
if results:
104+
distances = [r["distance"] for r in results]
105+
SemanticSearchUsage.objects.create(
106+
query_text=message_data,
107+
user=user if (user and user.is_authenticated) else None,
108+
document_guid=guid,
109+
document_name=document_name,
110+
num_results_requested=num_results,
111+
encoding_time=encoding_time,
112+
db_query_time=db_query_time,
113+
num_results_returned=len(results),
114+
max_distance=max(distances),
115+
median_distance=median(distances),
116+
min_distance=min(distances)
117+
)
118+
else:
119+
logger.warning("Semantic search returned no results")
120+
121+
SemanticSearchUsage.objects.create(
122+
query_text=message_data,
123+
user=user if (user and user.is_authenticated) else None,
124+
document_guid=guid,
125+
document_name=document_name,
126+
num_results_requested=num_results,
127+
encoding_time=encoding_time,
128+
db_query_time=db_query_time,
129+
num_results_returned=0,
130+
max_distance=None,
131+
median_distance=None,
132+
min_distance=None
133+
)
134+
except Exception as e:
135+
logger.error(f"Failed to create semantic search usage database record: {e}")
136+
89137
return results

0 commit comments

Comments
 (0)