Skip to content

Commit 7108a7b

Browse files
phernandezclaude
andcommitted
fix: resolve sync race conditions and search errors
- Add IntegrityError handling in entity_service.create_entity_from_markdown for file_path/permalink constraint violations - Add IntegrityError handling in sync_service.sync_regular_file for concurrent sync race conditions - Fix FTS "unknown special query" error when searching for wildcard "*" patterns - Add comprehensive test coverage for race condition edge cases and error handling - Gracefully handle concurrent sync processes with fallback to update operations Fixes sync errors from beta testing including: - "UNIQUE constraint failed: entity.file_path" - "UNIQUE constraint failed: entity.permalink" - "unknown special query" FTS errors 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <[email protected]>
1 parent 35884ef commit 7108a7b

File tree

6 files changed

+400
-25
lines changed

6 files changed

+400
-25
lines changed

src/basic_memory/repository/search_repository.py

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -237,19 +237,24 @@ async def search(
237237

238238
# Handle text search for title and content
239239
if search_text:
240-
# Check for explicit boolean operators - only detect them in proper boolean contexts
241-
has_boolean = any(op in f" {search_text} " for op in [" AND ", " OR ", " NOT "])
242-
243-
if has_boolean:
244-
# If boolean operators are present, use the raw query
245-
# No need to prepare it, FTS5 will understand the operators
246-
params["text"] = search_text
247-
conditions.append("(title MATCH :text OR content_stems MATCH :text)")
240+
# Skip FTS for wildcard-only queries that would cause "unknown special query" errors
241+
if search_text.strip() == "*" or search_text.strip() == "":
242+
# For wildcard searches, don't add any text conditions - return all results
243+
pass
248244
else:
249-
# Standard search with term preparation
250-
processed_text = self._prepare_search_term(search_text.strip())
251-
params["text"] = processed_text
252-
conditions.append("(title MATCH :text OR content_stems MATCH :text)")
245+
# Check for explicit boolean operators - only detect them in proper boolean contexts
246+
has_boolean = any(op in f" {search_text} " for op in [" AND ", " OR ", " NOT "])
247+
248+
if has_boolean:
249+
# If boolean operators are present, use the raw query
250+
# No need to prepare it, FTS5 will understand the operators
251+
params["text"] = search_text
252+
conditions.append("(title MATCH :text OR content_stems MATCH :text)")
253+
else:
254+
# Standard search with term preparation
255+
processed_text = self._prepare_search_term(search_text.strip())
256+
params["text"] = processed_text
257+
conditions.append("(title MATCH :text OR content_stems MATCH :text)")
253258

254259
# Handle title match search
255260
if title:
@@ -453,4 +458,4 @@ async def execute_query(
453458
end_time = time.perf_counter()
454459
elapsed_time = end_time - start_time
455460
logger.debug(f"Query executed successfully in {elapsed_time:.2f}s.")
456-
return result
461+
return result

src/basic_memory/services/entity_service.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,16 @@ async def create_entity_from_markdown(
299299
# Mark as incomplete because we still need to add relations
300300
model.checksum = None
301301
# Repository will set project_id automatically
302-
return await self.repository.add(model)
302+
try:
303+
return await self.repository.add(model)
304+
except IntegrityError as e:
305+
# Handle race condition where entity was created by another process
306+
if "UNIQUE constraint failed: entity.file_path" in str(e) or "UNIQUE constraint failed: entity.permalink" in str(e):
307+
logger.info(f"Entity already exists for file_path={file_path} (file_path or permalink conflict), updating instead of creating")
308+
return await self.update_entity_and_observations(file_path, markdown)
309+
else:
310+
# Re-raise if it's a different integrity error
311+
raise
303312

304313
async def update_entity_and_observations(
305314
self, file_path: Path, markdown: EntityMarkdown

src/basic_memory/sync/sync_service.py

Lines changed: 34 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -364,18 +364,41 @@ async def sync_regular_file(self, path: str, new: bool = True) -> Tuple[Optional
364364
content_type = self.file_service.content_type(path)
365365

366366
file_path = Path(path)
367-
entity = await self.entity_repository.add(
368-
Entity(
369-
entity_type="file",
370-
file_path=path,
371-
checksum=checksum,
372-
title=file_path.name,
373-
created_at=created,
374-
updated_at=modified,
375-
content_type=content_type,
367+
try:
368+
entity = await self.entity_repository.add(
369+
Entity(
370+
entity_type="file",
371+
file_path=path,
372+
checksum=checksum,
373+
title=file_path.name,
374+
created_at=created,
375+
updated_at=modified,
376+
content_type=content_type,
377+
)
376378
)
377-
)
378-
return entity, checksum
379+
return entity, checksum
380+
except IntegrityError as e:
381+
# Handle race condition where entity was created by another process
382+
if "UNIQUE constraint failed: entity.file_path" in str(e):
383+
logger.info(f"Entity already exists for file_path={path}, updating instead of creating")
384+
# Treat as update instead of create
385+
entity = await self.entity_repository.get_by_file_path(path)
386+
if entity is None: # pragma: no cover
387+
logger.error(f"Entity not found after constraint violation, path={path}")
388+
raise ValueError(f"Entity not found after constraint violation: {path}")
389+
390+
updated = await self.entity_repository.update(
391+
entity.id, {"file_path": path, "checksum": checksum}
392+
)
393+
394+
if updated is None: # pragma: no cover
395+
logger.error(f"Failed to update entity, entity_id={entity.id}, path={path}")
396+
raise ValueError(f"Failed to update entity with ID {entity.id}")
397+
398+
return updated, checksum
399+
else:
400+
# Re-raise if it's a different integrity error
401+
raise
379402
else:
380403
entity = await self.entity_repository.get_by_file_path(path)
381404
if entity is None: # pragma: no cover

tests/repository/test_search_repository.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -483,3 +483,37 @@ async def test_version_string_search_integration(self, search_repository, search
483483
# Test with other problematic patterns
484484
results3 = await search_repository.search(search_text="node.js version")
485485
assert isinstance(results3, list) # Should not crash
486+
487+
@pytest.mark.asyncio
488+
async def test_wildcard_only_search(self, search_repository, search_entity):
489+
"""Test that wildcard-only search '*' doesn't cause FTS5 errors (line 243 coverage)."""
490+
# Index an entity for testing
491+
search_row = SearchIndexRow(
492+
id=search_entity.id,
493+
type=SearchItemType.ENTITY.value,
494+
title="Test Entity",
495+
content_stems="test entity content",
496+
content_snippet="This is a test entity",
497+
permalink=search_entity.permalink,
498+
file_path=search_entity.file_path,
499+
entity_id=search_entity.id,
500+
metadata={"entity_type": search_entity.entity_type},
501+
created_at=search_entity.created_at,
502+
updated_at=search_entity.updated_at,
503+
project_id=search_repository.project_id,
504+
)
505+
506+
await search_repository.index_item(search_row)
507+
508+
# Test wildcard-only search - should not crash and should return results
509+
results = await search_repository.search(search_text="*")
510+
assert isinstance(results, list) # Should not crash
511+
assert len(results) >= 1 # Should return all results, including our test entity
512+
513+
# Test empty string search - should also not crash
514+
results_empty = await search_repository.search(search_text="")
515+
assert isinstance(results_empty, list) # Should not crash
516+
517+
# Test whitespace-only search
518+
results_whitespace = await search_repository.search(search_text=" ")
519+
assert isinstance(results_whitespace, list) # Should not crash

tests/services/test_entity_service.py

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -869,6 +869,109 @@ async def test_edit_entity_with_observations_and_relations(
869869
assert new_rel.relation_type == "relates to"
870870

871871

872+
@pytest.mark.asyncio
873+
async def test_create_entity_from_markdown_race_condition_handling(
874+
entity_service: EntityService, file_service: FileService
875+
):
876+
"""Test that create_entity_from_markdown handles race condition with IntegrityError (lines 304-311)."""
877+
from unittest.mock import patch, AsyncMock
878+
from sqlalchemy.exc import IntegrityError
879+
880+
file_path = Path("test/race-condition.md")
881+
882+
# Create a mock EntityMarkdown object
883+
from basic_memory.markdown.schemas import EntityFrontmatter, EntityMarkdown as RealEntityMarkdown
884+
from datetime import datetime, timezone
885+
886+
frontmatter = EntityFrontmatter(metadata={"title": "Race Condition Test", "type": "test"})
887+
markdown = RealEntityMarkdown(
888+
frontmatter=frontmatter,
889+
observations=[],
890+
relations=[],
891+
created=datetime.now(timezone.utc),
892+
modified=datetime.now(timezone.utc)
893+
)
894+
895+
# Mock the repository.add to raise IntegrityError on first call, then succeed on second
896+
original_add = entity_service.repository.add
897+
original_update = entity_service.update_entity_and_observations
898+
899+
call_count = 0
900+
901+
async def mock_add(*args, **kwargs):
902+
nonlocal call_count
903+
call_count += 1
904+
if call_count == 1:
905+
# Simulate race condition - another process created the entity
906+
raise IntegrityError("UNIQUE constraint failed: entity.file_path", None, None)
907+
else:
908+
return await original_add(*args, **kwargs)
909+
910+
# Mock update method to return a dummy entity
911+
async def mock_update(*args, **kwargs):
912+
from basic_memory.models import Entity
913+
from datetime import datetime, timezone
914+
915+
return Entity(
916+
id=1,
917+
title="Race Condition Test",
918+
entity_type="test",
919+
file_path=str(file_path),
920+
permalink="test/race-condition-test",
921+
content_type="text/markdown",
922+
created_at=datetime.now(timezone.utc),
923+
updated_at=datetime.now(timezone.utc),
924+
)
925+
926+
with patch.object(entity_service.repository, 'add', side_effect=mock_add), \
927+
patch.object(entity_service, 'update_entity_and_observations', side_effect=mock_update) as mock_update_call:
928+
929+
# Call the method
930+
result = await entity_service.create_entity_from_markdown(file_path, markdown)
931+
932+
# Verify it handled the race condition gracefully
933+
assert result is not None
934+
assert result.title == "Race Condition Test"
935+
assert result.file_path == str(file_path)
936+
937+
# Verify that update_entity_and_observations was called as fallback
938+
mock_update_call.assert_called_once_with(file_path, markdown)
939+
940+
941+
@pytest.mark.asyncio
942+
async def test_create_entity_from_markdown_integrity_error_reraise(
943+
entity_service: EntityService, file_service: FileService
944+
):
945+
"""Test that create_entity_from_markdown re-raises IntegrityError for non-race-condition cases."""
946+
from unittest.mock import patch
947+
from sqlalchemy.exc import IntegrityError
948+
949+
file_path = Path("test/integrity-error.md")
950+
951+
# Create a mock EntityMarkdown object
952+
from basic_memory.markdown.schemas import EntityFrontmatter, EntityMarkdown as RealEntityMarkdown
953+
from datetime import datetime, timezone
954+
955+
frontmatter = EntityFrontmatter(metadata={"title": "Integrity Error Test", "type": "test"})
956+
markdown = RealEntityMarkdown(
957+
frontmatter=frontmatter,
958+
observations=[],
959+
relations=[],
960+
created=datetime.now(timezone.utc),
961+
modified=datetime.now(timezone.utc)
962+
)
963+
964+
# Mock the repository.add to raise a different IntegrityError (not file_path/permalink constraint)
965+
async def mock_add(*args, **kwargs):
966+
# Simulate a different constraint violation
967+
raise IntegrityError("UNIQUE constraint failed: entity.some_other_field", None, None)
968+
969+
with patch.object(entity_service.repository, 'add', side_effect=mock_add):
970+
# Should re-raise the IntegrityError since it's not a file_path/permalink constraint
971+
with pytest.raises(IntegrityError, match="UNIQUE constraint failed: entity.some_other_field"):
972+
await entity_service.create_entity_from_markdown(file_path, markdown)
973+
974+
872975
# Edge case tests for find_replace operation
873976
@pytest.mark.asyncio
874977
async def test_edit_entity_find_replace_not_found(entity_service: EntityService):

0 commit comments

Comments
 (0)