Skip to content

Commit e419702

Browse files
committed
fix: run ruff formatter to fix code quality
1 parent b98c312 commit e419702

File tree

4 files changed

+77
-66
lines changed

4 files changed

+77
-66
lines changed

surfsense_backend/alembic/versions/38_add_blocknote_fields_to_documents.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,15 +48,19 @@ def upgrade() -> None:
4848
from app.tasks.celery_tasks.blocknote_migration_tasks import (
4949
populate_blocknote_for_documents_task,
5050
)
51-
51+
5252
# Queue the task to run asynchronously
5353
populate_blocknote_for_documents_task.apply_async()
54-
print("✓ Queued Celery task to populate blocknote_document for existing documents")
54+
print(
55+
"✓ Queued Celery task to populate blocknote_document for existing documents"
56+
)
5557
except Exception as e:
5658
# If Celery is not available or task queueing fails, log but don't fail the migration
5759
print(f"⚠ Warning: Could not queue blocknote population task: {e}")
5860
print(" You can manually trigger it later with:")
59-
print(" celery -A app.celery_app call app.tasks.celery_tasks.blocknote_migration_tasks.populate_blocknote_for_documents_task")
61+
print(
62+
" celery -A app.celery_app call app.tasks.celery_tasks.blocknote_migration_tasks.populate_blocknote_for_documents_task"
63+
)
6064

6165

6266
def downgrade() -> None:

surfsense_backend/app/routes/editor_routes.py

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ async def get_editor_content(
3333
attempts to generate it from chunks (lazy migration).
3434
"""
3535
from sqlalchemy.orm import selectinload
36-
36+
3737
result = await session.execute(
3838
select(Document)
3939
.options(selectinload(Document.chunks))
@@ -58,39 +58,39 @@ async def get_editor_content(
5858

5959
# Lazy migration: Try to generate blocknote_document from chunks
6060
from app.utils.blocknote_converter import convert_markdown_to_blocknote
61-
61+
6262
chunks = sorted(document.chunks, key=lambda c: c.id)
63-
63+
6464
if not chunks:
6565
raise HTTPException(
6666
status_code=400,
6767
detail="This document has no chunks and cannot be edited. Please re-upload to enable editing.",
6868
)
69-
69+
7070
# Reconstruct markdown from chunks
7171
markdown_content = "\n\n".join(chunk.content for chunk in chunks)
72-
72+
7373
if not markdown_content.strip():
7474
raise HTTPException(
7575
status_code=400,
7676
detail="This document has empty content and cannot be edited.",
7777
)
78-
78+
7979
# Convert to BlockNote
8080
blocknote_json = await convert_markdown_to_blocknote(markdown_content)
81-
81+
8282
if not blocknote_json:
8383
raise HTTPException(
8484
status_code=500,
8585
detail="Failed to convert document to editable format. Please try again later.",
8686
)
87-
87+
8888
# Save the generated blocknote_document (lazy migration)
8989
document.blocknote_document = blocknote_json
9090
document.content_needs_reindexing = False
9191
document.last_edited_at = None
9292
await session.commit()
93-
93+
9494
return {
9595
"document_id": document.id,
9696
"title": document.title,
@@ -111,35 +111,35 @@ async def save_document(
111111
Called when user clicks 'Save & Exit'.
112112
"""
113113
from app.tasks.celery_tasks.document_reindex_tasks import reindex_document_task
114-
114+
115115
# Verify ownership
116116
result = await session.execute(
117117
select(Document)
118118
.join(SearchSpace)
119119
.filter(Document.id == document_id, SearchSpace.user_id == user.id)
120120
)
121121
document = result.scalars().first()
122-
122+
123123
if not document:
124124
raise HTTPException(status_code=404, detail="Document not found")
125-
125+
126126
blocknote_document = data.get("blocknote_document")
127127
if not blocknote_document:
128128
raise HTTPException(status_code=400, detail="blocknote_document is required")
129-
129+
130130
# Save BlockNote document
131131
document.blocknote_document = blocknote_document
132132
document.last_edited_at = datetime.now(UTC)
133133
document.content_needs_reindexing = True
134-
134+
135135
await session.commit()
136-
136+
137137
# Queue reindex task
138138
reindex_document_task.delay(document_id, str(user.id))
139-
139+
140140
return {
141141
"status": "saved",
142142
"document_id": document_id,
143143
"message": "Document saved and will be reindexed in the background",
144-
"last_edited_at": document.last_edited_at.isoformat()
144+
"last_edited_at": document.last_edited_at.isoformat(),
145145
}

surfsense_backend/app/tasks/celery_tasks/blocknote_migration_tasks.py

Lines changed: 34 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def populate_blocknote_for_documents_task(
3636
):
3737
"""
3838
Celery task to populate blocknote_document for existing documents.
39-
39+
4040
Args:
4141
document_ids: Optional list of specific document IDs to process.
4242
If None, processes all documents with blocknote_document IS NULL.
@@ -60,7 +60,7 @@ async def _populate_blocknote_for_documents(
6060
):
6161
"""
6262
Async function to populate blocknote_document for documents.
63-
63+
6464
Args:
6565
document_ids: Optional list of specific document IDs to process
6666
batch_size: Number of documents to process per batch
@@ -69,75 +69,83 @@ async def _populate_blocknote_for_documents(
6969
try:
7070
# Build query for documents that need blocknote_document populated
7171
query = select(Document).where(Document.blocknote_document.is_(None))
72-
72+
7373
# If specific document IDs provided, filter by them
7474
if document_ids:
7575
query = query.where(Document.id.in_(document_ids))
76-
76+
7777
# Load chunks relationship to avoid N+1 queries
7878
query = query.options(selectinload(Document.chunks))
79-
79+
8080
# Execute query
8181
result = await session.execute(query)
8282
documents = result.scalars().all()
83-
83+
8484
total_documents = len(documents)
8585
logger.info(f"Found {total_documents} documents to process")
86-
86+
8787
if total_documents == 0:
8888
logger.info("No documents to process")
8989
return
90-
90+
9191
# Process documents in batches
9292
processed = 0
9393
failed = 0
94-
94+
9595
for i in range(0, total_documents, batch_size):
9696
batch = documents[i : i + batch_size]
97-
logger.info(f"Processing batch {i // batch_size + 1}: documents {i+1}-{min(i+batch_size, total_documents)}")
98-
97+
logger.info(
98+
f"Processing batch {i // batch_size + 1}: documents {i + 1}-{min(i + batch_size, total_documents)}"
99+
)
100+
99101
for document in batch:
100102
try:
101103
# Use preloaded chunks from selectinload - no need to query again
102104
chunks = sorted(document.chunks, key=lambda c: c.id)
103-
105+
104106
if not chunks:
105107
logger.warning(
106108
f"Document {document.id} ({document.title}) has no chunks, skipping"
107109
)
108110
failed += 1
109111
continue
110-
112+
111113
# Reconstruct markdown by concatenating chunk contents
112-
markdown_content = "\n\n".join(chunk.content for chunk in chunks)
113-
114+
markdown_content = "\n\n".join(
115+
chunk.content for chunk in chunks
116+
)
117+
114118
if not markdown_content or not markdown_content.strip():
115119
logger.warning(
116120
f"Document {document.id} ({document.title}) has empty markdown content, skipping"
117121
)
118122
failed += 1
119123
continue
120-
124+
121125
# Convert markdown to BlockNote JSON
122-
blocknote_json = await convert_markdown_to_blocknote(markdown_content)
123-
126+
blocknote_json = await convert_markdown_to_blocknote(
127+
markdown_content
128+
)
129+
124130
if not blocknote_json:
125131
logger.warning(
126132
f"Failed to convert markdown to BlockNote for document {document.id} ({document.title})"
127133
)
128134
failed += 1
129135
continue
130-
136+
131137
# Update document with blocknote_document (other fields already have correct defaults)
132138
document.blocknote_document = blocknote_json
133-
139+
134140
processed += 1
135-
141+
136142
# Commit every batch_size documents to avoid long transactions
137143
if processed % batch_size == 0:
138144
await session.commit()
139-
logger.info(f"Committed batch: {processed} documents processed so far")
140-
145+
logger.info(
146+
f"Committed batch: {processed} documents processed so far"
147+
)
148+
141149
except Exception as e:
142150
logger.error(
143151
f"Error processing document {document.id} ({document.title}): {e}",
@@ -146,15 +154,15 @@ async def _populate_blocknote_for_documents(
146154
failed += 1
147155
# Continue with next document instead of failing entire batch
148156
continue
149-
157+
150158
# Commit remaining changes in the batch
151159
await session.commit()
152160
logger.info(f"Completed batch {i // batch_size + 1}")
153-
161+
154162
logger.info(
155163
f"Migration complete: {processed} documents processed, {failed} failed"
156164
)
157-
165+
158166
except Exception as e:
159167
await session.rollback()
160168
logger.error(f"Error in blocknote migration task: {e}", exc_info=True)

surfsense_backend/app/tasks/celery_tasks/document_reindex_tasks.py

Lines changed: 19 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def get_celery_session_maker():
3535
def reindex_document_task(self, document_id: int, user_id: str):
3636
"""
3737
Celery task to reindex a document after editing.
38-
38+
3939
Args:
4040
document_id: ID of document to reindex
4141
user_id: ID of user who edited the document
@@ -62,66 +62,65 @@ async def _reindex_document(document_id: int, user_id: str):
6262
.where(Document.id == document_id)
6363
)
6464
document = result.scalars().first()
65-
65+
6666
if not document:
6767
logger.error(f"Document {document_id} not found")
6868
return
69-
69+
7070
if not document.blocknote_document:
7171
logger.warning(f"Document {document_id} has no BlockNote content")
7272
return
73-
73+
7474
logger.info(f"Reindexing document {document_id} ({document.title})")
75-
75+
7676
# 1. Convert BlockNote → Markdown
7777
markdown_content = await convert_blocknote_to_markdown(
7878
document.blocknote_document
7979
)
80-
80+
8181
if not markdown_content:
8282
logger.error(f"Failed to convert document {document_id} to markdown")
8383
return
84-
84+
8585
# 2. Delete old chunks explicitly
8686
from app.db import Chunk
87-
await session.execute(
88-
delete(Chunk).where(Chunk.document_id == document_id)
89-
)
87+
88+
await session.execute(delete(Chunk).where(Chunk.document_id == document_id))
9089
await session.flush() # Ensure old chunks are deleted
91-
90+
9291
# 3. Create new chunks
9392
new_chunks = await create_document_chunks(markdown_content)
94-
93+
9594
# 4. Add new chunks to session
9695
for chunk in new_chunks:
9796
chunk.document_id = document_id
9897
session.add(chunk)
99-
98+
10099
logger.info(f"Created {len(new_chunks)} chunks for document {document_id}")
101-
100+
102101
# 5. Regenerate summary
103102
user_llm = await get_user_long_context_llm(
104103
session, user_id, document.search_space_id
105104
)
106-
105+
107106
document_metadata = {
108107
"title": document.title,
109108
"document_type": document.document_type.value,
110109
}
111-
110+
112111
summary_content, summary_embedding = await generate_document_summary(
113112
markdown_content, user_llm, document_metadata
114113
)
115-
114+
116115
# 6. Update document
117116
document.content = summary_content
118117
document.embedding = summary_embedding
119118
document.content_needs_reindexing = False
120-
119+
121120
await session.commit()
122-
121+
123122
logger.info(f"Successfully reindexed document {document_id}")
124-
123+
125124
except Exception as e:
126125
await session.rollback()
127126
logger.error(f"Error reindexing document {document_id}: {e}", exc_info=True)

0 commit comments

Comments
 (0)