Skip to content

Commit f8e4926

Browse files
committed
feat: Implement document saving with reindexing
- Updated the document saving endpoint to trigger reindexing after saving. - Introduced a new Celery task for reindexing documents. - Refactored the editor page to reflect the changes in the API endpoint and method.
1 parent 91bc344 commit f8e4926

File tree

4 files changed

+156
-83
lines changed

4 files changed

+156
-83
lines changed

surfsense_backend/app/celery_app.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ def parse_schedule_interval(interval: str) -> dict:
6464
"app.tasks.celery_tasks.connector_tasks",
6565
"app.tasks.celery_tasks.schedule_checker_task",
6666
"app.tasks.celery_tasks.blocknote_migration_tasks",
67+
"app.tasks.celery_tasks.document_reindex_tasks",
6768
],
6869
)
6970

surfsense_backend/app/routes/editor_routes.py

Lines changed: 23 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -99,103 +99,47 @@ async def get_editor_content(
9999
}
100100

101101

102-
@router.put("/documents/{document_id}/blocknote-content")
103-
async def update_blocknote_content(
102+
@router.post("/documents/{document_id}/save")
103+
async def save_document(
104104
document_id: int,
105105
data: dict[str, Any],
106106
session: AsyncSession = Depends(get_async_session),
107107
user: User = Depends(current_active_user),
108108
):
109109
"""
110-
Auto-save BlockNote document during editing.
111-
Only updates blocknote_document field, not content.
110+
Save BlockNote document and trigger reindexing.
111+
Called when user clicks 'Save & Exit'.
112112
"""
113+
from app.tasks.celery_tasks.document_reindex_tasks import reindex_document_task
114+
115+
# Verify ownership
113116
result = await session.execute(
114117
select(Document)
115118
.join(SearchSpace)
116119
.filter(Document.id == document_id, SearchSpace.user_id == user.id)
117120
)
118121
document = result.scalars().first()
119-
122+
120123
if not document:
121124
raise HTTPException(status_code=404, detail="Document not found")
122-
125+
123126
blocknote_document = data.get("blocknote_document")
124127
if not blocknote_document:
125128
raise HTTPException(status_code=400, detail="blocknote_document is required")
126-
127-
# Update only blocknote_document and last_edited_at
129+
130+
# Save BlockNote document
128131
document.blocknote_document = blocknote_document
129132
document.last_edited_at = datetime.now(UTC)
130-
133+
document.content_needs_reindexing = True
134+
131135
await session.commit()
132-
await session.refresh(document)
133-
134-
return {"status": "saved", "last_edited_at": document.last_edited_at.isoformat()}
135-
136-
137-
# did not implement reindexing (for now)
138-
# @router.post("/documents/{document_id}/finalize-edit")
139-
# async def finalize_edit(
140-
# document_id: int,
141-
# session: AsyncSession = Depends(get_async_session),
142-
# user: User = Depends(current_active_user),
143-
# ):
144-
# """
145-
# Finalize document editing: convert BlockNote to markdown,
146-
# update content (summary), and trigger reindexing.
147-
# """
148-
# result = await session.execute(
149-
# select(Document)
150-
# .join(SearchSpace)
151-
# .filter(Document.id == document_id, SearchSpace.user_id == user.id)
152-
# )
153-
# document = result.scalars().first()
154-
155-
# if not document:
156-
# raise HTTPException(status_code=404, detail="Document not found")
157-
158-
# if not document.blocknote_document:
159-
# raise HTTPException(
160-
# status_code=400,
161-
# detail="Document has no BlockNote content to finalize"
162-
# )
163-
164-
# # 1. Convert BlockNote JSON → Markdown
165-
# full_markdown = await convert_blocknote_to_markdown(document.blocknote_document)
166-
167-
# if not full_markdown:
168-
# raise HTTPException(
169-
# status_code=500,
170-
# detail="Failed to convert BlockNote document to markdown"
171-
# )
172-
173-
# # 2. Generate new summary from full markdown
174-
# from app.services.llm_service import get_user_long_context_llm
175-
# from app.utils.document_converters import generate_document_summary
176-
177-
# user_llm = await get_user_long_context_llm(session, str(user.id), document.search_space_id)
178-
# if not user_llm:
179-
# raise HTTPException(
180-
# status_code=500,
181-
# detail="No LLM configured for summary generation"
182-
# )
183-
184-
# document_metadata = document.document_metadata or {}
185-
# summary_content, summary_embedding = await generate_document_summary(
186-
# full_markdown, user_llm, document_metadata
187-
# )
188-
189-
# # 3. Update document fields
190-
# document.content = summary_content
191-
# document.embedding = summary_embedding
192-
# document.content_needs_reindexing = True # Trigger chunk regeneration
193-
# document.last_edited_at = datetime.now(UTC)
194-
195-
# await session.commit()
196-
197-
# return {
198-
# "status": "finalized",
199-
# "message": "Document saved. Summary and chunks will be regenerated in the background.",
200-
# "content_needs_reindexing": True,
201-
# }
136+
137+
# Queue reindex task
138+
reindex_document_task.delay(document_id, str(user.id))
139+
140+
return {
141+
"status": "saved",
142+
"document_id": document_id,
143+
"message": "Document saved and will be reindexed in the background",
144+
"last_edited_at": document.last_edited_at.isoformat()
145+
}
Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
"""Celery tasks for reindexing edited documents."""
2+
3+
import logging
4+
5+
from sqlalchemy import select
6+
from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine
7+
from sqlalchemy.pool import NullPool
8+
from sqlalchemy import delete
9+
from sqlalchemy.orm import selectinload
10+
11+
from app.celery_app import celery_app
12+
from app.config import config
13+
from app.db import Document
14+
from app.utils.blocknote_converter import convert_blocknote_to_markdown
15+
from app.utils.document_converters import (
16+
create_document_chunks,
17+
generate_document_summary,
18+
)
19+
from app.services.llm_service import get_user_long_context_llm
20+
21+
logger = logging.getLogger(__name__)
22+
23+
24+
def get_celery_session_maker():
25+
"""Create async session maker for Celery tasks."""
26+
engine = create_async_engine(
27+
config.DATABASE_URL,
28+
poolclass=NullPool,
29+
echo=False,
30+
)
31+
return async_sessionmaker(engine, expire_on_commit=False)
32+
33+
34+
@celery_app.task(name="reindex_document", bind=True)
35+
def reindex_document_task(self, document_id: int, user_id: str):
36+
"""
37+
Celery task to reindex a document after editing.
38+
39+
Args:
40+
document_id: ID of document to reindex
41+
user_id: ID of user who edited the document
42+
"""
43+
import asyncio
44+
45+
loop = asyncio.new_event_loop()
46+
asyncio.set_event_loop(loop)
47+
48+
try:
49+
loop.run_until_complete(_reindex_document(document_id, user_id))
50+
finally:
51+
loop.close()
52+
53+
54+
async def _reindex_document(document_id: int, user_id: str):
55+
"""Async function to reindex a document."""
56+
async with get_celery_session_maker()() as session:
57+
try:
58+
# Get document
59+
result = await session.execute(
60+
select(Document)
61+
.options(selectinload(Document.chunks)) # Eagerly load chunks
62+
.where(Document.id == document_id)
63+
)
64+
document = result.scalars().first()
65+
66+
if not document:
67+
logger.error(f"Document {document_id} not found")
68+
return
69+
70+
if not document.blocknote_document:
71+
logger.warning(f"Document {document_id} has no BlockNote content")
72+
return
73+
74+
logger.info(f"Reindexing document {document_id} ({document.title})")
75+
76+
# 1. Convert BlockNote → Markdown
77+
markdown_content = await convert_blocknote_to_markdown(
78+
document.blocknote_document
79+
)
80+
81+
if not markdown_content:
82+
logger.error(f"Failed to convert document {document_id} to markdown")
83+
return
84+
85+
# 2. Delete old chunks explicitly
86+
from app.db import Chunk
87+
await session.execute(
88+
delete(Chunk).where(Chunk.document_id == document_id)
89+
)
90+
await session.flush() # Ensure old chunks are deleted
91+
92+
# 3. Create new chunks
93+
new_chunks = await create_document_chunks(markdown_content)
94+
95+
# 4. Add new chunks to session
96+
for chunk in new_chunks:
97+
chunk.document_id = document_id
98+
session.add(chunk)
99+
100+
logger.info(f"Created {len(new_chunks)} chunks for document {document_id}")
101+
102+
# 5. Regenerate summary
103+
user_llm = await get_user_long_context_llm(
104+
session, user_id, document.search_space_id
105+
)
106+
107+
document_metadata = {
108+
"title": document.title,
109+
"document_type": document.document_type.value,
110+
}
111+
112+
summary_content, summary_embedding = await generate_document_summary(
113+
markdown_content, user_llm, document_metadata
114+
)
115+
116+
# 6. Update document
117+
document.content = summary_content
118+
document.embedding = summary_embedding
119+
document.content_needs_reindexing = False
120+
121+
await session.commit()
122+
123+
logger.info(f"Successfully reindexed document {document_id}")
124+
125+
except Exception as e:
126+
await session.rollback()
127+
logger.error(f"Error reindexing document {document_id}: {e}", exc_info=True)
128+
raise

surfsense_web/app/dashboard/[search_space_id]/editor/[documentId]/page.tsx

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -112,11 +112,11 @@ export default function EditorPage() {
112112

113113
setSaving(true);
114114
try {
115-
// Save blocknote_document to database (without finalizing/reindexing)
115+
// Save blocknote_document and trigger reindexing in background
116116
const response = await fetch(
117-
`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/documents/${documentId}/blocknote-content`,
117+
`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/documents/${documentId}/save`,
118118
{
119-
method: "PUT",
119+
method: "POST",
120120
headers: {
121121
"Content-Type": "application/json",
122122
Authorization: `Bearer ${token}`,
@@ -133,7 +133,7 @@ export default function EditorPage() {
133133
}
134134

135135
setHasUnsavedChanges(false);
136-
toast.success("Document saved successfully");
136+
toast.success("Document saved! Reindexing in background...");
137137

138138
// Small delay before redirect to show success message
139139
setTimeout(() => {

0 commit comments

Comments
 (0)