|
14 | 14 |
|
15 | 15 | from open_webui.config import VECTOR_DB |
16 | 16 | from open_webui.retrieval.vector.connector import VECTOR_DB_CLIENT |
17 | | -from open_webui.utils.misc import get_last_user_message |
| 17 | +from open_webui.utils.misc import get_last_user_message, calculate_sha256_string |
| 18 | + |
18 | 19 | from open_webui.models.users import UserModel |
19 | 20 |
|
20 | 21 | from open_webui.env import ( |
@@ -178,45 +179,31 @@ def merge_and_sort_query_results( |
178 | 179 | combined_distances = [] |
179 | 180 | combined_documents = [] |
180 | 181 | combined_metadatas = [] |
181 | | - combined_ids = [] |
182 | 182 |
|
183 | 183 | for data in query_results: |
184 | 184 | combined_distances.extend(data["distances"][0]) |
185 | 185 | combined_documents.extend(data["documents"][0]) |
186 | 186 | combined_metadatas.extend(data["metadatas"][0]) |
187 | | - # DISTINCT(chunk_id,file_id) - in case if id (chunk_ids) become ordinals |
188 | | - combined_ids.extend( |
189 | | - [ |
190 | | - f"{id}-{meta['file_id']}" |
191 | | - for id, meta in zip(data["ids"][0], data["metadatas"][0]) |
192 | | - ] |
193 | | - ) |
194 | 187 |
|
195 | | - # Create a list of tuples (distance, document, metadata, ids) |
196 | | - combined = list( |
197 | | - zip(combined_distances, combined_documents, combined_metadatas, combined_ids) |
198 | | - ) |
| 188 | + # Create a list of tuples (distance, document, metadata) |
| 189 | + combined = list(zip(combined_distances, combined_documents, combined_metadatas)) |
199 | 190 |
|
200 | 191 | # Sort the list based on distances |
201 | 192 | combined.sort(key=lambda x: x[0], reverse=reverse) |
202 | 193 |
|
203 | | - sorted_distances = [] |
204 | | - sorted_documents = [] |
205 | | - sorted_metadatas = [] |
206 | | - # Otherwise we don't have anything :-( |
207 | | - if combined: |
| 194 | + # We don't have anything :-( |
| 195 | + if not combined: |
| 196 | + sorted_distances = [] |
| 197 | + sorted_documents = [] |
| 198 | + sorted_metadatas = [] |
| 199 | + else: |
208 | 200 | # Unzip the sorted list |
209 | | - all_distances, all_documents, all_metadatas, all_ids = zip(*combined) |
210 | | - seen_ids = set() |
| 201 | + sorted_distances, sorted_documents, sorted_metadatas = zip(*combined) |
| 202 | + |
211 | 203 | # Slicing the lists to include only k elements |
212 | | - for index, id in enumerate(all_ids): |
213 | | - if id not in seen_ids: |
214 | | - sorted_distances.append(all_distances[index]) |
215 | | - sorted_documents.append(all_documents[index]) |
216 | | - sorted_metadatas.append(all_metadatas[index]) |
217 | | - seen_ids.add(id) |
218 | | - if len(sorted_distances) >= k: |
219 | | - break |
| 204 | + sorted_distances = list(sorted_distances)[:k] |
| 205 | + sorted_documents = list(sorted_documents)[:k] |
| 206 | + sorted_metadatas = list(sorted_metadatas)[:k] |
220 | 207 |
|
221 | 208 | # Create the output dictionary |
222 | 209 | result = { |
|
0 commit comments