Skip to content

Commit 00b3583

Browse files
authored
fix: fix reindex not working due to unnecessary dupe check (open-webui#20857)
* Update retrieval.py * Update knowledge.py * Update retrieval.py * Update knowledge.py
1 parent 4d9a7cc commit 00b3583

File tree

1 file changed

+10
-2
lines changed

1 file changed

+10
-2
lines changed

backend/open_webui/routers/retrieval.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1425,8 +1425,16 @@ def _get_docs_info(docs: list[Document]) -> str:
14251425
if result is not None and result.ids and len(result.ids) > 0:
14261426
existing_doc_ids = result.ids[0]
14271427
if existing_doc_ids:
1428-
log.info(f"Document with hash {metadata['hash']} already exists")
1429-
raise ValueError(ERROR_MESSAGES.DUPLICATE_CONTENT)
1428+
# Check if the existing document belongs to the same file
1429+
# If same file_id, this is a re-add/reindex - allow it
1430+
# If different file_id, this is a duplicate - block it
1431+
existing_file_id = None
1432+
if result.metadatas and result.metadatas[0]:
1433+
existing_file_id = result.metadatas[0][0].get("file_id")
1434+
1435+
if existing_file_id != metadata.get("file_id"):
1436+
log.info(f"Document with hash {metadata['hash']} already exists")
1437+
raise ValueError(ERROR_MESSAGES.DUPLICATE_CONTENT)
14301438

14311439
if split:
14321440
if request.app.state.config.ENABLE_MARKDOWN_HEADER_TEXT_SPLITTER:

0 commit comments

Comments
 (0)