Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions backend/database/conversations.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,56 @@ def get_conversations_without_photos(
return conversations


def get_conversations_lite(
uid: str,
limit: int = 100,
offset: int = 0,
include_discarded: bool = False,
statuses: List[str] = [],
start_date: Optional[datetime] = None,
end_date: Optional[datetime] = None,
categories: Optional[List[str]] = None,
folder_id: Optional[str] = None,
starred: Optional[bool] = None,
):
"""
Lightweight conversation listing: no photo subcollection queries, no transcript
decryption/decompression. Returns dicts with transcript_segments=[] and photos=[].
"""
conversations_ref = db.collection('users').document(uid).collection(conversations_collection)
if not include_discarded:
conversations_ref = conversations_ref.where(filter=FieldFilter('discarded', '==', False))
if len(statuses) > 0:
conversations_ref = conversations_ref.where(filter=FieldFilter('status', 'in', statuses))

if categories:
conversations_ref = conversations_ref.where(filter=FieldFilter('structured.category', 'in', categories))

if folder_id:
conversations_ref = conversations_ref.where(filter=FieldFilter('folder_id', '==', folder_id))

if starred is not None:
conversations_ref = conversations_ref.where(filter=FieldFilter('starred', '==', starred))

if start_date:
conversations_ref = conversations_ref.where(filter=FieldFilter('created_at', '>=', start_date))
if end_date:
conversations_ref = conversations_ref.where(filter=FieldFilter('created_at', '<=', end_date))

conversations_ref = conversations_ref.order_by('created_at', direction=firestore.Query.DESCENDING)
conversations_ref = conversations_ref.limit(limit).offset(offset)

conversations = []
for doc in conversations_ref.stream():
data = doc.to_dict()
# Strip heavy fields that are not needed for list views
data['transcript_segments'] = []
data.pop('transcript_segments_compressed', None)
data['photos'] = []
conversations.append(data)
return conversations


def iter_all_conversations(uid: str, batch_size: int = 400, include_discarded: bool = True):
"""Yield all conversations for a user, decrypted, in batches. Used for streaming data export."""
conversations_ref = db.collection('users').document(uid).collection(conversations_collection)
Expand Down
37 changes: 26 additions & 11 deletions backend/routers/conversations.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,17 +133,32 @@ def get_conversations(
if len(statuses) == 0:
statuses = "processing,completed"

conversations = conversations_db.get_conversations_without_photos(
uid,
limit,
offset,
include_discarded=include_discarded,
statuses=statuses.split(",") if len(statuses) > 0 else [],
start_date=start_date,
end_date=end_date,
folder_id=folder_id,
starred=starred,
)
# limit=1 calls (e.g. in-progress recovery) need full hydration (transcript, photos).
# Only skip the expensive N+1 photo/transcript work for multi-item list fetches.
if limit > 1:
conversations = conversations_db.get_conversations_lite(
uid,
limit,
offset,
include_discarded=include_discarded,
statuses=statuses.split(",") if len(statuses) > 0 else [],
start_date=start_date,
end_date=end_date,
folder_id=folder_id,
starred=starred,
)
else:
conversations = conversations_db.get_conversations(
uid,
limit,
offset,
include_discarded=include_discarded,
statuses=statuses.split(",") if len(statuses) > 0 else [],
start_date=start_date,
end_date=end_date,
folder_id=folder_id,
starred=starred,
)

for conv in conversations:
if conv.get('is_locked', False):
Expand Down
38 changes: 23 additions & 15 deletions backend/routers/developer.py
Original file line number Diff line number Diff line change
Expand Up @@ -748,25 +748,33 @@ def get_conversations(
except ValueError as e:
raise HTTPException(status_code=400, detail=f"Invalid category {str(e)}")

conversations = conversations_db.get_conversations(
uid,
limit,
offset,
include_discarded=False,
statuses=["completed"],
start_date=start_date,
end_date=end_date,
categories=[c.value for c in category_list],
)
if include_transcript:
conversations = conversations_db.get_conversations(
uid,
limit,
offset,
include_discarded=False,
statuses=["completed"],
start_date=start_date,
end_date=end_date,
categories=[c.value for c in category_list],
)
else:
conversations = conversations_db.get_conversations_lite(
uid,
limit,
offset,
include_discarded=False,
statuses=["completed"],
start_date=start_date,
end_date=end_date,
categories=[c.value for c in category_list],
)

# Filter out locked conversations completely
unlocked_conversations = [conv for conv in conversations if not conv.get('is_locked', False)]

# Remove transcript_segments if not requested
if not include_transcript:
for conv in unlocked_conversations:
conv.pop('transcript_segments', None)
else:
if include_transcript:
_add_speaker_names_to_segments(uid, unlocked_conversations)

return unlocked_conversations
Expand Down
2 changes: 1 addition & 1 deletion backend/routers/mcp.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ def get_conversations(
except ValueError as e:
raise HTTPException(status_code=400, detail=f"Invalid category {str(e)}")

conversations = conversations_db.get_conversations(
conversations = conversations_db.get_conversations_lite(
uid,
limit,
offset,
Expand Down
2 changes: 1 addition & 1 deletion backend/routers/mcp_sse.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ def execute_tool(user_id: str, tool_name: str, arguments: dict) -> dict:
except ValueError:
pass

conversations = conversations_db.get_conversations(
conversations = conversations_db.get_conversations_lite(
user_id,
limit,
offset,
Expand Down
3 changes: 3 additions & 0 deletions backend/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,6 @@ pytest tests/unit/test_pusher_private_cloud_data_protection.py -v
pytest tests/unit/test_storage_upload_audio_chunk_data_protection.py -v
pytest tests/unit/test_people_conversations_500s.py -v
pytest tests/unit/test_firestore_read_ops_cache.py -v
pytest tests/unit/test_conversations_lite_db.py -v
pytest tests/unit/test_conversations_router_branching.py -v
pytest tests/unit/test_mcp_and_developer_conversations_lite.py -v
163 changes: 163 additions & 0 deletions backend/tests/unit/test_conversations_lite_db.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
"""
Tests for get_conversations_lite() in database/conversations.py.
Verifies that the lite function strips heavy fields and applies all filters.
Imports the REAL production function via importlib to avoid Firestore init.
"""

import importlib.util
import os
import sys
import types
from unittest.mock import MagicMock

os.environ.setdefault(
"ENCRYPTION_SECRET",
"omi_ZwB2ZNqB2HHpMK6wStk7sTpavJiPTFg7gXUHnc4tFABPU6pZ2c2DKgehtfgi4RZv",
)


def _ensure_mock_module(name: str):
"""Ensure a MagicMock module exists in sys.modules (supports 'from X import Y')."""
if name not in sys.modules:
mod = MagicMock()
mod.__path__ = []
mod.__name__ = name
mod.__loader__ = None
mod.__spec__ = None
mod.__package__ = name if '.' not in name else name.rsplit('.', 1)[0]
sys.modules[name] = mod
return sys.modules[name]


# Stub _client with mock db BEFORE database.conversations can import it
mock_db = MagicMock()

_ensure_mock_module("database")
sys.modules["database"].__path__ = getattr(sys.modules["database"], '__path__', [])

client_stub = _ensure_mock_module("database._client")
client_stub.db = mock_db
client_stub.document_id_from_seed = MagicMock(return_value="doc-id")

# Stub database.users and database.helpers
_ensure_mock_module("database.users")


def _passthrough(*args, **kwargs):
"""No-op decorator factory: @decorator(...) returns identity decorator."""
return lambda f: f


# Stub helpers with real passthrough decorators (database.conversations uses them at import time)
helpers_mod = types.ModuleType("database.helpers")
helpers_mod.set_data_protection_level = _passthrough
helpers_mod.prepare_for_write = _passthrough
helpers_mod.prepare_for_read = _passthrough
helpers_mod.with_photos = _passthrough
sys.modules["database.helpers"] = helpers_mod

# Stub utils modules (must be MagicMock to support 'from X import Y')
for name in [
"utils",
"utils.other",
"utils.other.hume",
"utils.other.storage",
"utils.encryption",
]:
_ensure_mock_module(name)

# Load the REAL database/conversations.py using importlib
_conv_path = os.path.join(os.path.dirname(__file__), '..', '..', 'database', 'conversations.py')
_conv_path = os.path.abspath(_conv_path)

# Remove any stale entry so we get the real module
if "database.conversations" in sys.modules:
del sys.modules["database.conversations"]

spec = importlib.util.spec_from_file_location("database.conversations", _conv_path)
conv_module = importlib.util.module_from_spec(spec)
sys.modules["database.conversations"] = conv_module
spec.loader.exec_module(conv_module)

get_conversations_lite = conv_module.get_conversations_lite


def _make_fake_doc(data: dict):
doc = MagicMock()
doc.to_dict.return_value = data
return doc


def _setup_mock_ref():
mock_ref = MagicMock()
mock_ref.where.return_value = mock_ref
mock_ref.order_by.return_value = mock_ref
mock_ref.limit.return_value = mock_ref
mock_ref.offset.return_value = mock_ref
mock_db.collection.return_value.document.return_value.collection.return_value = mock_ref
return mock_ref


class TestGetConversationsLite:
def test_strips_heavy_fields(self):
"""transcript_segments=[], photos=[], transcript_segments_compressed removed."""
mock_ref = _setup_mock_ref()
fake_data = {
'id': 'conv_1',
'structured': {'title': 'Test', 'overview': 'Overview', 'category': 'personal'},
'transcript_segments': [{'text': 'secret data', 'speaker_id': 0, 'start': 0.0, 'end': 1.0}],
'transcript_segments_compressed': b'\x00\x01\x02',
'photos': [{'url': 'https://example.com/photo.jpg'}],
'discarded': False,
'status': 'completed',
'created_at': None,
}
mock_ref.stream.return_value = [_make_fake_doc(dict(fake_data))]

result = get_conversations_lite('uid_1', limit=10, offset=0)

assert len(result) == 1
conv = result[0]
assert conv['transcript_segments'] == []
assert conv['photos'] == []
assert 'transcript_segments_compressed' not in conv
assert conv['id'] == 'conv_1'
assert conv['structured']['title'] == 'Test'

def test_applies_all_filters(self):
"""All filter parameters are forwarded to Firestore queries."""
from datetime import datetime

mock_ref = _setup_mock_ref()
mock_ref.stream.return_value = []

start = datetime(2026, 1, 1)
end = datetime(2026, 1, 31)

get_conversations_lite(
'uid_1',
limit=25,
offset=10,
include_discarded=False,
statuses=['completed'],
start_date=start,
end_date=end,
categories=['personal'],
folder_id='folder_abc',
starred=True,
)

where_calls = mock_ref.where.call_args_list
assert len(where_calls) == 7 # discarded + status + category + folder + starred + start + end
mock_ref.order_by.assert_called_once()
mock_ref.limit.assert_called_once_with(25)
mock_ref.offset.assert_called_once_with(10)

def test_include_discarded_true_skips_filter(self):
"""When include_discarded=True, the discarded filter is NOT applied."""
mock_ref = _setup_mock_ref()
mock_ref.stream.return_value = []

get_conversations_lite('uid_1', include_discarded=True)

assert mock_ref.where.call_count == 0
Loading