Skip to content

Commit 93d486d

Browse files
committed
revert: faulty dedup code
1 parent c882aac commit 93d486d

File tree

2 files changed

+16
-29
lines changed

2 files changed

+16
-29
lines changed

backend/open_webui/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1714,7 +1714,7 @@ class BannerModel(BaseModel):
17141714
- Respond in the same language as the user's query.
17151715
- If the context is unreadable or of poor quality, inform the user and provide the best possible answer.
17161716
- If the answer isn't present in the context but you possess the knowledge, explain this to the user and provide the answer using your own understanding.
1717-
- **Only include inline citations using [source_id] when a <source_id> tag is explicitly provided in the context.**
1717+
- **Only include inline citations using [source_id] (e.g., [1], [2]) when a `<source_id>` tag is explicitly provided in the context.**
17181718
- Do not cite if the <source_id> tag is not provided in the context.
17191719
- Do not use XML tags in your response.
17201720
- Ensure citations are concise and directly related to the information provided.

backend/open_webui/retrieval/utils.py

Lines changed: 15 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@
1414

1515
from open_webui.config import VECTOR_DB
1616
from open_webui.retrieval.vector.connector import VECTOR_DB_CLIENT
17-
from open_webui.utils.misc import get_last_user_message
17+
from open_webui.utils.misc import get_last_user_message, calculate_sha256_string
18+
1819
from open_webui.models.users import UserModel
1920

2021
from open_webui.env import (
@@ -178,45 +179,31 @@ def merge_and_sort_query_results(
178179
combined_distances = []
179180
combined_documents = []
180181
combined_metadatas = []
181-
combined_ids = []
182182

183183
for data in query_results:
184184
combined_distances.extend(data["distances"][0])
185185
combined_documents.extend(data["documents"][0])
186186
combined_metadatas.extend(data["metadatas"][0])
187-
# DISTINCT(chunk_id,file_id) - in case if id (chunk_ids) become ordinals
188-
combined_ids.extend(
189-
[
190-
f"{id}-{meta['file_id']}"
191-
for id, meta in zip(data["ids"][0], data["metadatas"][0])
192-
]
193-
)
194187

195-
# Create a list of tuples (distance, document, metadata, ids)
196-
combined = list(
197-
zip(combined_distances, combined_documents, combined_metadatas, combined_ids)
198-
)
188+
# Create a list of tuples (distance, document, metadata)
189+
combined = list(zip(combined_distances, combined_documents, combined_metadatas))
199190

200191
# Sort the list based on distances
201192
combined.sort(key=lambda x: x[0], reverse=reverse)
202193

203-
sorted_distances = []
204-
sorted_documents = []
205-
sorted_metadatas = []
206-
# Otherwise we don't have anything :-(
207-
if combined:
194+
# We don't have anything :-(
195+
if not combined:
196+
sorted_distances = []
197+
sorted_documents = []
198+
sorted_metadatas = []
199+
else:
208200
# Unzip the sorted list
209-
all_distances, all_documents, all_metadatas, all_ids = zip(*combined)
210-
seen_ids = set()
201+
sorted_distances, sorted_documents, sorted_metadatas = zip(*combined)
202+
211203
# Slicing the lists to include only k elements
212-
for index, id in enumerate(all_ids):
213-
if id not in seen_ids:
214-
sorted_distances.append(all_distances[index])
215-
sorted_documents.append(all_documents[index])
216-
sorted_metadatas.append(all_metadatas[index])
217-
seen_ids.add(id)
218-
if len(sorted_distances) >= k:
219-
break
204+
sorted_distances = list(sorted_distances)[:k]
205+
sorted_documents = list(sorted_documents)[:k]
206+
sorted_metadatas = list(sorted_metadatas)[:k]
220207

221208
# Create the output dictionary
222209
result = {

0 commit comments

Comments
 (0)