Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ logs: ## display app-dev logs (follow mode)
.PHONY: logs

run-backend: ## Start only the backend application and all needed services
@$(COMPOSE) up --force-recreate -d docspec
@$(COMPOSE) up --force-recreate -d celery-dev
@$(COMPOSE) up --force-recreate -d y-provider-development
@$(COMPOSE) up --force-recreate -d nginx
Expand Down
5 changes: 5 additions & 0 deletions compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -217,3 +217,8 @@ services:
kc_postgresql:
condition: service_healthy
restart: true

docspec:
image: ghcr.io/docspecio/api:2.0.0
ports:
- "4000:4000"
1 change: 1 addition & 0 deletions docs/env.md
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ These are the environment variables you can set for the `impress-backend` contai
| USER_OIDC_ESSENTIAL_CLAIMS | Essential claims in OIDC token | [] |
| Y_PROVIDER_API_BASE_URL | Y Provider url | |
| Y_PROVIDER_API_KEY | Y provider API key | |
| DOCSPEC_API_URL | URL to endpoint of DocSpec conversion API | |


## impress-frontend image
Expand Down
4 changes: 3 additions & 1 deletion env.d/development/common
Original file line number Diff line number Diff line change
Expand Up @@ -67,5 +67,7 @@ DJANGO_SERVER_TO_SERVER_API_TOKENS=server-api-token
Y_PROVIDER_API_BASE_URL=http://y-provider-development:4444/api/
Y_PROVIDER_API_KEY=yprovider-api-key

DOCSPEC_API_URL=http://docspec:4000/conversion

# Theme customization
THEME_CUSTOMIZATION_CACHE_TIMEOUT=15
THEME_CUSTOMIZATION_CACHE_TIMEOUT=15
2 changes: 1 addition & 1 deletion env.d/development/common.e2e
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@ Y_PROVIDER_API_BASE_URL=http://y-provider:4444/api/

# Throttle
API_DOCUMENT_THROTTLE_RATE=1000/min
API_CONFIG_THROTTLE_RATE=1000/min
API_CONFIG_THROTTLE_RATE=1000/min
11 changes: 9 additions & 2 deletions src/backend/core/api/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,15 @@
from django.utils.text import slugify
from django.utils.translation import gettext_lazy as _

from core.services import mime_types
import magic
from rest_framework import serializers

from core import choices, enums, models, utils, validators
from core.services.ai_services import AI_ACTIONS
from core.services.converter_services import (
ConversionError,
YdocConverter,
Converter,
)


Expand Down Expand Up @@ -188,6 +189,7 @@ class DocumentSerializer(ListDocumentSerializer):

content = serializers.CharField(required=False)
websocket = serializers.BooleanField(required=False, write_only=True)
file = serializers.FileField(required=False, write_only=True, allow_null=True)

class Meta:
model = models.Document
Expand All @@ -204,6 +206,7 @@ class Meta:
"deleted_at",
"depth",
"excerpt",
"file",
"is_favorite",
"link_role",
"link_reach",
Expand Down Expand Up @@ -461,7 +464,11 @@ def create(self, validated_data):
language = user.language or language

try:
document_content = YdocConverter().convert(validated_data["content"])
document_content = Converter().convert(
validated_data["content"],
mime_types.MARKDOWN,
mime_types.YJS
)
except ConversionError as err:
raise serializers.ValidationError(
{"content": ["Could not convert content"]}
Expand Down
40 changes: 30 additions & 10 deletions src/backend/core/api/viewsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,12 @@
from core.services.ai_services import AIService
from core.services.collaboration_services import CollaborationService
from core.services.converter_services import (
ConversionError,
ServiceUnavailableError as YProviderServiceUnavailableError,
)
from core.services.converter_services import (
ValidationError as YProviderValidationError,
Converter,
)
from core.services.converter_services import (
YdocConverter,
)
from core.services import mime_types
from core.tasks.mail import send_ask_for_access_mail
from core.utils import extract_attachments, filter_descendants

Expand Down Expand Up @@ -504,6 +502,28 @@ def perform_create(self, serializer):
"IN SHARE ROW EXCLUSIVE MODE;"
)

# Remove file from validated_data as it's not a model field
# Process it if present
uploaded_file = serializer.validated_data.pop("file", None)

# If a file is uploaded, convert it to Yjs format and set as content
if uploaded_file:
try:
file_content = uploaded_file.read()

converter = Converter()
converted_content = converter.convert(
file_content,
content_type=uploaded_file.content_type,
accept=mime_types.YJS
)
serializer.validated_data["content"] = converted_content
serializer.validated_data["title"] = uploaded_file.name
except ConversionError as err:
raise drf.exceptions.ValidationError(
{"file": ["Could not convert file content"]}
) from err

obj = models.Document.add_root(
creator=self.request.user,
**serializer.validated_data,
Expand Down Expand Up @@ -1603,14 +1623,14 @@ def content(self, request, pk=None):
if base64_content is not None:
# Convert using the y-provider service
try:
yprovider = YdocConverter()
yprovider = Converter()
result = yprovider.convert(
base64.b64decode(base64_content),
"application/vnd.yjs.doc",
mime_types.YJS,
{
"markdown": "text/markdown",
"html": "text/html",
"json": "application/json",
"markdown": mime_types.MARKDOWN,
"html": mime_types.HTML,
"json": mime_types.JSON,
}[content_format],
)
content = result
Expand Down
71 changes: 65 additions & 6 deletions src/backend/core/services/converter_services.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
from django.conf import settings

import requests
import typing

from core.services import mime_types

class ConversionError(Exception):
"""Base exception for conversion-related errors."""
Expand All @@ -19,8 +21,65 @@ class ServiceUnavailableError(ConversionError):
"""Raised when the conversion service is unavailable."""


class ConverterProtocol(typing.Protocol):
def convert(self, text, content_type, accept): ...


class Converter:
docspec: ConverterProtocol
ydoc: ConverterProtocol

def __init__(self):
self.docspec = DocSpecConverter()
self.ydoc = YdocConverter()

def convert(self, input, content_type, accept):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can maybe be consistent between all the convert method ? In this one the first parameter is called input, in the calss DocSpecConverter it is called data and then in the YdocConverter class it is called text. data looks like good.

"""Convert input into other formats using external microservices."""

if content_type == mime_types.DOCX and accept == mime_types.YJS:
return self.convert(
self.docspec.convert(input, mime_types.DOCX, mime_types.BLOCKNOTE),
mime_types.BLOCKNOTE,
mime_types.YJS
)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To ease the read and comprehension, you can maybe break down in multiple calls avoiding the recursion ?

data = self.docspec.convert(input, mime_types.DOCX, mime_types.BLOCKNOTE)
return self.ydoc.convert(data, mime_types.BLOCKNOTE, mime_types.YJS)


return self.ydoc.convert(input, content_type, accept)


class DocSpecConverter:
"""Service class for DocSpec conversion-related operations."""

def _request(self, url, data, content_type):
"""Make a request to the DocSpec API."""

response = requests.post(
url,
headers={"Accept": mime_types.BLOCKNOTE},
files={"file": ("document.docx", data, content_type)},
timeout=settings.CONVERSION_API_TIMEOUT,
verify=settings.CONVERSION_API_SECURE,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see this settings defined in the settings.py module.
I think in the Production class it mshould be set to True without ability to modify it (don't use django configuration in this case)

)
response.raise_for_status()
return response

def convert(self, data, content_type, accept):
"""Convert a Document to BlockNote."""
if not data:
raise ValidationError("Input data cannot be empty")

if content_type != mime_types.DOCX or accept != mime_types.BLOCKNOTE:
raise ValidationError(f"Conversion from {content_type} to {accept} is not supported.")

try:
return self._request(settings.DOCSPEC_API_URL, data, content_type).content
except requests.RequestException as err:
raise ServiceUnavailableError(
"Failed to connect to DocSpec conversion service",
) from err


class YdocConverter:
"""Service class for conversion-related operations."""
"""Service class for YDoc conversion-related operations."""

@property
def auth_header(self):
Expand All @@ -45,7 +104,7 @@ def _request(self, url, data, content_type, accept):
return response

def convert(
self, text, content_type="text/markdown", accept="application/vnd.yjs.doc"
self, text, content_type=mime_types.MARKDOWN, accept=mime_types.YJS
):
"""Convert a Markdown text into our internal format using an external microservice."""

Expand All @@ -59,14 +118,14 @@ def convert(
content_type,
accept,
)
if accept == "application/vnd.yjs.doc":
if accept == mime_types.YJS:
return b64encode(response.content).decode("utf-8")
if accept in {"text/markdown", "text/html"}:
if accept in {mime_types.MARKDOWN, "text/html"}:
return response.text
if accept == "application/json":
if accept == mime_types.JSON:
return response.json()
raise ValidationError("Unsupported format")
except requests.RequestException as err:
raise ServiceUnavailableError(
"Failed to connect to conversion service",
f"Failed to connect to YDoc conversion service {content_type}, {accept}",
) from err
6 changes: 6 additions & 0 deletions src/backend/core/services/mime_types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
BLOCKNOTE = "application/vnd.blocknote+json"
YJS = "application/vnd.yjs.doc"
MARKDOWN = "text/markdown"
JSON = "application/json"
DOCX = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
HTML = "text/html"
6 changes: 6 additions & 0 deletions src/backend/impress/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -680,6 +680,12 @@ class Base(Configuration):
environ_prefix=None,
)

# DocSpec API microservice
DOCSPEC_API_URL = values.Value(
environ_name="DOCSPEC_API_URL",
environ_prefix=None
)

# Conversion endpoint
CONVERSION_API_ENDPOINT = values.Value(
default="convert",
Expand Down
Loading