Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
"""38_add_blocknote_fields_to_documents

Revision ID: 38
Revises: 37

"""
from collections.abc import Sequence

import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

from alembic import op

# revision identifiers, used by Alembic.
revision: str = '38'
down_revision: str | None = '37'
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None


def upgrade() -> None:
"""Upgrade schema - Add BlockNote fields only."""

op.add_column(
'documents',
sa.Column('blocknote_document', postgresql.JSONB(astext_type=sa.Text()), nullable=True),
)
op.add_column(
'documents',
sa.Column('content_needs_reindexing', sa.Boolean(), nullable=False, server_default=sa.false()),
)
op.add_column(
'documents',
sa.Column('last_edited_at', sa.TIMESTAMP(timezone=True), nullable=True)
)

def downgrade() -> None:
"""Downgrade schema - Remove BlockNote fields."""
op.drop_column('documents', 'last_edited_at')
op.drop_column('documents', 'content_needs_reindexing')
op.drop_column('documents', 'blocknote_document')
13 changes: 12 additions & 1 deletion surfsense_backend/app/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
UniqueConstraint,
text,
)
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.dialects.postgresql import JSONB, UUID
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
from sqlalchemy.orm import DeclarativeBase, Mapped, declared_attr, relationship

Expand Down Expand Up @@ -178,6 +178,17 @@ class Document(BaseModel, TimestampMixin):
content_hash = Column(String, nullable=False, index=True, unique=True)
unique_identifier_hash = Column(String, nullable=True, index=True, unique=True)
embedding = Column(Vector(config.embedding_model_instance.dimension))

# BlockNote live editing state (NULL when never edited)
blocknote_document = Column(JSONB, nullable=True)

# blocknote background reindex flag
content_needs_reindexing = Column(
Boolean, nullable=False, default=False, server_default=text("false")
)

# Track when blocknote document was last edited
last_edited_at = Column(TIMESTAMP(timezone=True), nullable=True)

search_space_id = Column(
Integer, ForeignKey("searchspaces.id", ondelete="CASCADE"), nullable=False
Expand Down
2 changes: 2 additions & 0 deletions surfsense_backend/app/routes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
)
from .chats_routes import router as chats_router
from .documents_routes import router as documents_router
from .editor_routes import router as editor_router
from .google_calendar_add_connector_route import (
router as google_calendar_add_connector_router,
)
Expand All @@ -21,6 +22,7 @@
router = APIRouter()

router.include_router(search_spaces_router)
router.include_router(editor_router)
router.include_router(documents_router)
router.include_router(podcasts_router)
router.include_router(chats_router)
Expand Down
161 changes: 161 additions & 0 deletions surfsense_backend/app/routes/editor_routes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
"""
Editor routes for BlockNote document editing.
"""
from datetime import UTC, datetime
from typing import Any

from fastapi import APIRouter, Depends, HTTPException
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession

from app.db import Document, SearchSpace, User, get_async_session
from app.users import current_active_user
from app.utils.blocknote_converter import (
convert_blocknote_to_markdown,
convert_markdown_to_blocknote,
)

router = APIRouter()


@router.get("/documents/{document_id}/editor-content")
async def get_editor_content(
document_id: int,
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user),
):
"""
Get document content for editing.

Returns BlockNote JSON document. If blocknote_document is NULL,
attempts to convert from `content` - though this won't work well
for old documents that only have summaries.
"""
result = await session.execute(
select(Document)
.join(SearchSpace)
.filter(Document.id == document_id, SearchSpace.user_id == user.id)
)
document = result.scalars().first()

if not document:
raise HTTPException(status_code=404, detail="Document not found")

# If blocknote_document exists, return it
if document.blocknote_document:
return {
"document_id": document.id,
"title": document.title,
"blocknote_document": document.blocknote_document,
"last_edited_at": document.last_edited_at.isoformat() if document.last_edited_at else None,
}

# For old documents without blocknote_document, return error
# (Can't convert summary back to full document)
raise HTTPException(
status_code=400,
detail="This document was uploaded before editing was enabled. Please re-upload to enable editing."
)


@router.put("/documents/{document_id}/blocknote-content")
async def update_blocknote_content(
document_id: int,
data: dict[str, Any],
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user),
):
"""
Auto-save BlockNote document during editing.
Only updates blocknote_document field, not content.
"""
result = await session.execute(
select(Document)
.join(SearchSpace)
.filter(Document.id == document_id, SearchSpace.user_id == user.id)
)
document = result.scalars().first()

if not document:
raise HTTPException(status_code=404, detail="Document not found")

blocknote_document = data.get("blocknote_document")
if not blocknote_document:
raise HTTPException(status_code=400, detail="blocknote_document is required")

# Update only blocknote_document and last_edited_at
document.blocknote_document = blocknote_document
document.last_edited_at = datetime.now(UTC)

await session.commit()
await session.refresh(document)

return {"status": "saved", "last_edited_at": document.last_edited_at.isoformat()}


# did not implement reindexing (for now)
# @router.post("/documents/{document_id}/finalize-edit")
# async def finalize_edit(
# document_id: int,
# session: AsyncSession = Depends(get_async_session),
# user: User = Depends(current_active_user),
# ):
# """
# Finalize document editing: convert BlockNote to markdown,
# update content (summary), and trigger reindexing.
# """
# result = await session.execute(
# select(Document)
# .join(SearchSpace)
# .filter(Document.id == document_id, SearchSpace.user_id == user.id)
# )
# document = result.scalars().first()

# if not document:
# raise HTTPException(status_code=404, detail="Document not found")

# if not document.blocknote_document:
# raise HTTPException(
# status_code=400,
# detail="Document has no BlockNote content to finalize"
# )

# # 1. Convert BlockNote JSON → Markdown
# full_markdown = await convert_blocknote_to_markdown(document.blocknote_document)

# if not full_markdown:
# raise HTTPException(
# status_code=500,
# detail="Failed to convert BlockNote document to markdown"
# )

# # 2. Generate new summary from full markdown
# from app.services.llm_service import get_user_long_context_llm
# from app.utils.document_converters import generate_document_summary

# user_llm = await get_user_long_context_llm(session, str(user.id), document.search_space_id)
# if not user_llm:
# raise HTTPException(
# status_code=500,
# detail="No LLM configured for summary generation"
# )

# document_metadata = document.document_metadata or {}
# summary_content, summary_embedding = await generate_document_summary(
# full_markdown, user_llm, document_metadata
# )

# # 3. Update document fields
# document.content = summary_content
# document.embedding = summary_embedding
# document.content_needs_reindexing = True # Trigger chunk regeneration
# document.last_edited_at = datetime.now(UTC)

# await session.commit()

# return {
# "status": "finalized",
# "message": "Document saved. Summary and chunks will be regenerated in the background.",
# "content_needs_reindexing": True,
# }

Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,16 @@ async def add_extension_received_document(

# Process chunks
chunks = await create_document_chunks(content.pageContent)

from app.utils.blocknote_converter import convert_markdown_to_blocknote

# Convert markdown to BlockNote JSON
blocknote_json = await convert_markdown_to_blocknote(combined_document_string)
if not blocknote_json:
logging.warning(
f"Failed to convert extension document '{content.metadata.VisitedWebPageTitle}' "
f"to BlockNote JSON, document will not be editable"
)

# Update or create document
if existing_document:
Expand All @@ -154,6 +164,7 @@ async def add_extension_received_document(
existing_document.embedding = summary_embedding
existing_document.document_metadata = content.metadata.model_dump()
existing_document.chunks = chunks
existing_document.blocknote_document = blocknote_json

await session.commit()
await session.refresh(existing_document)
Expand All @@ -170,6 +181,7 @@ async def add_extension_received_document(
chunks=chunks,
content_hash=content_hash,
unique_identifier_hash=unique_identifier_hash,
blocknote_document=blocknote_json,
)

session.add(document)
Expand Down
30 changes: 30 additions & 0 deletions surfsense_backend/app/tasks/document_processors/file_processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,14 @@ async def add_received_file_document_using_unstructured(

# Process chunks
chunks = await create_document_chunks(file_in_markdown)

from app.utils.blocknote_converter import convert_markdown_to_blocknote

# Convert markdown to BlockNote JSON
blocknote_json = await convert_markdown_to_blocknote(file_in_markdown)
if not blocknote_json:
logging.warning(f"Failed to convert {file_name} to BlockNote JSON, document will not be editable")


# Update or create document
if existing_document:
Expand All @@ -112,6 +120,7 @@ async def add_received_file_document_using_unstructured(
"ETL_SERVICE": "UNSTRUCTURED",
}
existing_document.chunks = chunks
existing_document.blocknote_document = blocknote_json

await session.commit()
await session.refresh(existing_document)
Expand All @@ -131,6 +140,7 @@ async def add_received_file_document_using_unstructured(
chunks=chunks,
content_hash=content_hash,
unique_identifier_hash=unique_identifier_hash,
blocknote_document=blocknote_json,
)

session.add(document)
Expand Down Expand Up @@ -213,6 +223,14 @@ async def add_received_file_document_using_llamacloud(

# Process chunks
chunks = await create_document_chunks(file_in_markdown)

from app.utils.blocknote_converter import convert_markdown_to_blocknote

# Convert markdown to BlockNote JSON
blocknote_json = await convert_markdown_to_blocknote(file_in_markdown)
if not blocknote_json:
logging.warning(f"Failed to convert {file_name} to BlockNote JSON, document will not be editable")


# Update or create document
if existing_document:
Expand All @@ -226,6 +244,7 @@ async def add_received_file_document_using_llamacloud(
"ETL_SERVICE": "LLAMACLOUD",
}
existing_document.chunks = chunks
existing_document.blocknote_document = blocknote_json

await session.commit()
await session.refresh(existing_document)
Expand All @@ -245,6 +264,7 @@ async def add_received_file_document_using_llamacloud(
chunks=chunks,
content_hash=content_hash,
unique_identifier_hash=unique_identifier_hash,
blocknote_document=blocknote_json,
)

session.add(document)
Expand Down Expand Up @@ -352,6 +372,14 @@ async def add_received_file_document_using_docling(

# Process chunks
chunks = await create_document_chunks(file_in_markdown)

from app.utils.blocknote_converter import convert_markdown_to_blocknote

# Convert markdown to BlockNote JSON
blocknote_json = await convert_markdown_to_blocknote(file_in_markdown)
if not blocknote_json:
logging.warning(f"Failed to convert {file_name} to BlockNote JSON, document will not be editable")


# Update or create document
if existing_document:
Expand All @@ -365,6 +393,7 @@ async def add_received_file_document_using_docling(
"ETL_SERVICE": "DOCLING",
}
existing_document.chunks = chunks
existing_document.blocknote_document = blocknote_json

await session.commit()
await session.refresh(existing_document)
Expand All @@ -384,6 +413,7 @@ async def add_received_file_document_using_docling(
chunks=chunks,
content_hash=content_hash,
unique_identifier_hash=unique_identifier_hash,
blocknote_document=blocknote_json,
)

session.add(document)
Expand Down
Loading
Loading