Skip to content

Commit ebb8965

Browse files
✨ Add extracted_text field to Document model and implement text extraction test
1 parent 26bd7dd commit ebb8965

File tree

2 files changed

+30
-2
lines changed

2 files changed

+30
-2
lines changed

backend/app/models.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
from pydantic import EmailStr
44
from sqlmodel import Field, Relationship, SQLModel
5-
5+
from sqlalchemy import Column, Text
66

77
# Shared properties
88
class UserBase(SQLModel):
@@ -81,7 +81,7 @@ class Document(DocumentBase, table=True):
8181
foreign_key="user.id", nullable=False, ondelete="CASCADE"
8282
)
8383
owner: User | None = Relationship(back_populates="documents")
84-
84+
extracted_text: str | None = Field(default=None, sa_column=Column(Text, nullable=True))
8585

8686
# Properties to return via API, id is always required
8787
# class DocumentPublic(DocumentBase):
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
from unittest.mock import MagicMock, patch
2+
from app.core.extractors import extract_text_and_save_to_db
3+
from app.models import Document
4+
5+
def test_extract_text_and_save_to_db_success():
6+
fake_text = "Extracted text content"
7+
fake_s3_url = "s3://bucket/path/to/file.pdf"
8+
fake_doc_id = "123e4567-e89b-12d3-a456-426614174000"
9+
10+
# Mock document object
11+
mock_document = Document(id=fake_doc_id)
12+
13+
with patch("app.core.extractors.extract_text_from_file", return_value=fake_text) as extract_mock, \
14+
patch("app.core.extractors.Session") as session_class_mock:
15+
16+
# Mock session and query chain
17+
session_instance = MagicMock()
18+
session_class_mock.return_value.__enter__.return_value = session_instance
19+
session_instance.query.return_value.filter.return_value.first.return_value = mock_document
20+
21+
# Run the function
22+
extract_text_and_save_to_db(fake_s3_url, fake_doc_id)
23+
24+
# Assertions
25+
extract_mock.assert_called_once_with(fake_s3_url)
26+
session_instance.query.assert_called_once()
27+
assert mock_document.extracted_text == fake_text
28+
session_instance.commit.assert_called_once()

0 commit comments

Comments
 (0)