-
Notifications
You must be signed in to change notification settings - Fork 8
Open
Labels
Description
Metadata
- File(s):
frappe_gmail_thread/api/activity.py:28-63 - Category: Database / API
- Severity: High
- Effort to Fix: Medium
- Estimated Performance Gain: 60-80%
Problem Description
The get_linked_gmail_threads() API endpoint is called from the timeline of any document linked to Gmail threads. It exhibits a severe N+1 query pattern:
- Fetches all Gmail Thread names linked to a document
- Loads the full document for each thread with
get_doc() - For each email in each thread, calls
get_attachments_data()which does aget_value()per attachment
For a document with 5 linked threads, each containing 10 emails with 2 attachments each, this results in:
- 1 initial query (get thread names)
- 5
get_doc()calls (full document loads with child tables) - 100
get_value()calls for attachment URLs (5 threads × 10 emails × 2 attachments) - Total: 106 queries for a single timeline load
Code Location
Main API endpoint (lines 18-82):
@frappe.whitelist()
def get_linked_gmail_threads(doctype, docname):
gmail_threads = frappe.get_all(
"Gmail Thread",
filters={
"reference_doctype": doctype,
"reference_name": docname,
},
)
data = []
for thread in gmail_threads:
thread = frappe.get_doc("Gmail Thread", thread.name) # N+1: Full doc per thread
for email in thread.emails:
t_data = {
# ... build timeline data ...
"attachments": get_attachments_data(email), # N+1: Query per attachment
# ...
}
data.append(t_data)
return dataAttachment lookup (lines 7-15):
def get_attachments_data(email):
attachments_data = json.loads(email.attachments_data)
for attachment in attachments_data:
file_doc_name = attachment.get("file_doc_name")
if file_doc_name:
file_url = frappe.db.get_value("File", file_doc_name, "file_url") # Query per attachment
attachment["file_url"] = file_url
return attachments_dataRoot Cause
- Using
get_all()to get names thenget_doc()in a loop instead of fetching data in a single query - Loading full documents when only specific fields are needed
- Nested loop for attachments with individual
get_value()calls - This API is called on every timeline view, making it a hot path
Proposed Solution
Batch fetch all data upfront and process in memory:
import json
import frappe
def get_attachments_data_batch(emails):
"""Batch fetch attachment URLs for all emails at once."""
all_file_names = []
email_attachments_map = {}
for email in emails:
attachments_data = json.loads(email.attachments_data or "[]")
email_attachments_map[email.name] = attachments_data
for att in attachments_data:
if att.get("file_doc_name"):
all_file_names.append(att["file_doc_name"])
# Single query for all file URLs
if all_file_names:
file_urls = frappe.get_all(
"File",
filters={"name": ["in", all_file_names]},
fields=["name", "file_url"]
)
file_url_map = {f.name: f.file_url for f in file_urls}
# Update attachments with URLs
for email_name, attachments in email_attachments_map.items():
for att in attachments:
file_name = att.get("file_doc_name")
if file_name and file_name in file_url_map:
att["file_url"] = file_url_map[file_name]
return email_attachments_map
@frappe.whitelist()
def get_linked_gmail_threads(doctype, docname):
# Fetch threads with needed fields in single query
gmail_threads = frappe.get_all(
"Gmail Thread",
filters={
"reference_doctype": doctype,
"reference_name": docname,
},
fields=["name", "reference_doctype", "reference_name", "_liked_by"]
)
if not gmail_threads:
return []
thread_names = [t.name for t in gmail_threads]
thread_map = {t.name: t for t in gmail_threads}
# Batch fetch all emails for all threads
all_emails = frappe.get_all(
"Single Email CT", # Child table DocType
filters={"parent": ["in", thread_names]},
fields=[
"name", "parent", "creation", "subject", "content", "sender",
"sender_full_name", "cc", "bcc", "recipients", "sent_or_received",
"read_by_recipient", "date_and_time", "attachments_data"
],
order_by="creation asc"
)
# Batch fetch all attachment URLs
attachments_map = get_attachments_data_batch(all_emails)
data = []
for email in all_emails:
thread = thread_map[email.parent]
t_data = {
"icon": "mail",
"icon_size": "sm",
"creation": email.creation,
"is_card": True,
"doctype": "Gmail Thread",
"id": f"gmail-thread-{thread.name}",
"template": "timeline_message_box",
"owner": email.sender,
"template_data": {
"doc": {
"name": thread.name,
"communication_type": "Gmail Thread",
"communication_medium": "Email",
"comment_type": "",
"communication_date": email.creation,
"content": email.content,
"sender": email.sender,
"sender_full_name": email.sender_full_name,
"cc": email.cc,
"bcc": email.bcc,
"creation": email.creation,
"subject": email.subject,
"delivery_status": "Sent" if email.sent_or_received == "Sent" else "Received",
"_liked_by": thread._liked_by,
"reference_doctype": thread.reference_doctype,
"reference_name": thread.reference_name,
"read_by_recipient": email.read_by_recipient,
"rating": 0,
"recipients": email.recipients,
"attachments": attachments_map.get(email.name, []),
"_url": f"/app/gmail-thread/{thread.name}",
"_doc_status": "Sent" if email.sent_or_received == "Sent" else "Received",
"_doc_status_indicator": "green" if email.sent_or_received == "Sent" else "blue",
"owner": email.sender,
"user_full_name": email.sender_full_name,
}
},
"name": thread.name,
"delivery_status": "Sent" if email.sent_or_received == "Sent" else "Received",
}
data.append(t_data)
return dataImplementation Steps
- Create
get_attachments_data_batch()function to batch fetch file URLs - Refactor
get_linked_gmail_threads()to:- Fetch threads with required fields in initial query
- Batch fetch all child emails in one query
- Use batch attachment lookup
- Replace
thread.get_url()with direct URL construction (avoids loading controller) - Test with documents that have multiple linked threads
- Add API response caching if needed (using
frappe.cache())
Additional Notes
- This API is called via the
additional_timeline_contenthook on EVERY DocType (see Issue 4) - Combined with Issue 4, this creates performance impact across the entire application
- Consider adding pagination if documents can have many linked threads
- The child table is
Single Email CT- verify the exact DocType name in implementation
Reactions are currently unavailable