From be410143f17d6e783f1a8bedfbbaa415cb56c739 Mon Sep 17 00:00:00 2001 From: mustafawohhlig123123 Date: Fri, 8 Aug 2025 12:39:58 +0530 Subject: [PATCH] attachment lsting and reading functionality added --- gmail/gmail_tools.py | 897 ++++++++++++++++++++++++++++++++++++++++++- pyproject.toml | 5 + requirements.txt | 186 +++++++++ 3 files changed, 1087 insertions(+), 1 deletion(-) create mode 100644 requirements.txt diff --git a/gmail/gmail_tools.py b/gmail/gmail_tools.py index 0e634b9b..b6f79c2b 100644 --- a/gmail/gmail_tools.py +++ b/gmail/gmail_tools.py @@ -8,7 +8,10 @@ import asyncio import base64 import ssl -from typing import Optional, List, Dict, Literal +from typing import Optional, List, Dict, Literal , Any , Tuple +from pathlib import Path +import json +import io from email.mime.text import MIMEText @@ -26,10 +29,85 @@ logger = logging.getLogger(__name__) +try: + import pdfplumber + PDF_AVAILABLE = True + PDF_LIBRARY = "pdfplumber" +except ImportError: + PDF_AVAILABLE = False + PDF_LIBRARY = None + +try: + import mammoth + DOCX_AVAILABLE = True + DOCX_LIBRARY = "mammoth" +except ImportError: + DOCX_AVAILABLE = False + DOCX_LIBRARY = None + +try: + import pyxlsb + import pandas as pd + EXCEL_AVAILABLE = True + EXCEL_LIBRARY = "pyxlsb" +except ImportError: + EXCEL_AVAILABLE = False + EXCEL_LIBRARY = None + +try: + from bs4 import BeautifulSoup + HTML_AVAILABLE = True +except ImportError: + HTML_AVAILABLE = False + +try: + import csv + CSV_AVAILABLE = True +except ImportError: + CSV_AVAILABLE = False + + + GMAIL_BATCH_SIZE = 25 GMAIL_REQUEST_DELAY = 0.1 + +def _extract_message_body(payload): + """ + Helper function to extract plain text body from a Gmail message payload. + + Args: + payload (dict): The message payload from Gmail API + + Returns: + str: The plain text body content, or empty string if not found + """ + body_data = "" + parts = [payload] if "parts" not in payload else payload.get("parts", []) + + part_queue = list(parts) # Use a queue for BFS traversal of parts + while part_queue: + part = part_queue.pop(0) + if part.get("mimeType") == "text/plain" and part.get("body", {}).get("data"): + data = base64.urlsafe_b64decode(part["body"]["data"]) + body_data = data.decode("utf-8", errors="ignore") + break # Found plain text body + elif part.get("mimeType", "").startswith("multipart/") and "parts" in part: + part_queue.extend(part.get("parts", [])) # Add sub-parts to the queue + + # If no plain text found, check the main payload body if it exists + if ( + not body_data + and payload.get("mimeType") == "text/plain" + and payload.get("body", {}).get("data") + ): + data = base64.urlsafe_b64decode(payload["body"]["data"]) + body_data = data.decode("utf-8", errors="ignore") + + return body_data + + def _extract_message_body(payload): """ Helper function to extract plain text body from a Gmail message payload. @@ -65,6 +143,823 @@ def _extract_message_body(payload): return body_data +def _extract_attachments(payload: Dict, message_id: str) -> List[Dict[str, Any]]: + """Extract attachment information from message payload""" + attachments = [] + + def extract_from_part(part): + filename = part.get('filename', '') + body = part.get('body', {}) + + if filename and body.get('attachmentId'): # This part has an attachment + attachment_id = body.get('attachmentId') + attachments.append({ + 'attachment_id': attachment_id, + 'filename': filename, + 'mime_type': part.get('mimeType', ''), + 'size': body.get('size', 0), + 'message_id': message_id + }) + + if 'parts' in part: + for subpart in part['parts']: + extract_from_part(subpart) + + extract_from_part(payload) + return attachments + + + +def _extract_body_content(payload: Dict) -> Tuple[str, str]: + """Extract text and HTML body content from message payload""" + text_content = "" + html_content = "" + + def extract_from_part(part): + nonlocal text_content, html_content + mime_type = part.get('mimeType', '') + body = part.get('body', {}) + + if mime_type == 'text/plain' and body.get('data'): + text_content = base64.urlsafe_b64decode(body['data']).decode('utf-8', errors='ignore') + elif mime_type == 'text/html' and body.get('data'): + html_content = base64.urlsafe_b64decode(body['data']).decode('utf-8', errors='ignore') + elif 'parts' in part: + for subpart in part['parts']: + extract_from_part(subpart) + + extract_from_part(payload) + return text_content, html_content + + +@server.tool() +@handle_http_errors("get_gmail_message_content_with_attachments", is_read_only=True, service_type="gmail") +@require_google_service("gmail", "gmail_read") +async def get_gmail_message_content_with_attachments( + service, message_id: str, user_google_email: str +) -> Dict[str, Any]: + """ + Retrieves the full content of a Gmail message including attachments info. + + Args: + service: The Gmail API service object + message_id (str): The unique ID of the Gmail message to retrieve. + user_google_email (str): The user's Google email address. Required. + + Returns: + dict: Message details including subject, sender, body content, and attachments list. + """ + logger.info( + f"[get_gmail_message_content_with_attachments] Message ID: '{message_id}', Email: '{user_google_email}'" + ) + + # Fetch the full message to get headers, body, and attachments + message_full = await asyncio.to_thread( + service.users() + .messages() + .get( + userId="me", + id=message_id, + format="full", # Request full payload + ) + .execute + ) + + # Extract headers + headers = { + h["name"]: h["value"] + for h in message_full.get("payload", {}).get("headers", []) + } + subject = headers.get("Subject", "(no subject)") + sender = headers.get("From", "(unknown sender)") + recipient = headers.get("To", "(unknown recipient)") + + # Extract body content (both text and HTML) + payload = message_full.get("payload", {}) + text_body, html_body = _extract_body_content(payload) + + # Fallback to old method if new method doesn't find text + if not text_body: + text_body = _extract_message_body(payload) + + # Extract attachments + attachments = _extract_attachments(payload, message_id) + + return { + "message_id": message_id, + "subject": subject, + "sender": sender, + "recipient": recipient, + "text_body": text_body or '[No text body found]', + "html_body": html_body, + "attachments": attachments, + "attachment_count": len(attachments), + "snippet": message_full.get("snippet", ""), + "thread_id": message_full.get("threadId", "") + } + + +@server.tool() +@handle_http_errors("get_gmail_message_content", is_read_only=True, service_type="gmail") +@require_google_service("gmail", "gmail_read") +async def get_gmail_message_content( + service, message_id: str, user_google_email: str +) -> str: + """ + Retrieves the full content (subject, sender, plain text body) of a specific Gmail message. + This is your original function, kept for backward compatibility. + + Args: + message_id (str): The unique ID of the Gmail message to retrieve. + user_google_email (str): The user's Google email address. Required. + + Returns: + str: The message details including subject, sender, and body content. + """ + logger.info( + f"[get_gmail_message_content] Invoked. Message ID: '{message_id}', Email: '{user_google_email}'" + ) + + logger.info(f"[get_gmail_message_content] Using service for: {user_google_email}") + + # Fetch message metadata first to get headers + message_metadata = await asyncio.to_thread( + service.users() + .messages() + .get( + userId="me", + id=message_id, + format="metadata", + metadataHeaders=["Subject", "From"], + ) + .execute + ) + + headers = { + h["name"]: h["value"] + for h in message_metadata.get("payload", {}).get("headers", []) + } + subject = headers.get("Subject", "(no subject)") + sender = headers.get("From", "(unknown sender)") + + # Now fetch the full message to get the body parts + message_full = await asyncio.to_thread( + service.users() + .messages() + .get( + userId="me", + id=message_id, + format="full", # Request full payload for body + ) + .execute + ) + + # Extract the plain text body using helper function + payload = message_full.get("payload", {}) + body_data = _extract_message_body(payload) + + content_text = "\n".join( + [ + f"Subject: {subject}", + f"From: {sender}", + f"\n--- BODY ---\n{body_data or '[No text/plain body found]'}", + ] + ) + return content_text + + +@server.tool() +@handle_http_errors("download_gmail_attachment", is_read_only=False, service_type="gmail") +@require_google_service("gmail", "gmail_read") +async def download_gmail_attachment( + service, + message_id: str, + attachment_id: str, + user_google_email: str, + save_path: Optional[str] = None, + max_size_mb: int = 100 +) -> Dict[str, Any]: + """ + Download an email attachment from Gmail. + + Args: + service: The Gmail API service object + message_id (str): The message ID containing the attachment + attachment_id (str): The attachment ID to download + user_google_email (str): The user's Google email address + save_path (str, optional): Path to save the attachment file + max_size_mb (int): Maximum attachment size in MB (default: 100) + + Returns: + dict: Dictionary containing attachment info and data + """ + logger.info( + f"[download_gmail_attachment] Message ID: '{message_id}', Attachment ID: '{attachment_id}'" + ) + + try: + # Get the attachment + attachment = await asyncio.to_thread( + service.users() + .messages() + .attachments() + .get( + userId="me", + messageId=message_id, + id=attachment_id + ) + .execute + ) + + # Validate file size before downloading + attachment_size = int(attachment.get('size', 0)) + max_size_bytes = max_size_mb * 1024 * 1024 + + if attachment_size > max_size_bytes: + raise Exception(f"Attachment too large: {attachment_size / (1024*1024):.1f}MB exceeds limit of {max_size_mb}MB") + + # Decode the attachment data + file_data = base64.urlsafe_b64decode(attachment['data']) + + result = { + 'attachment_id': attachment_id, + 'message_id': message_id, + 'size': attachment['size'], + 'data_base64': base64.b64encode(file_data).decode('utf-8') # Encode as base64 string for JSON serialization + } + + if save_path: + # Save to file with path traversal protection + save_path = Path(save_path).resolve() + + # Ensure the path doesn't escape the intended directory + base_dir = Path.cwd().resolve() + try: + save_path.relative_to(base_dir) + except ValueError: + raise Exception("Invalid file path - path traversal not allowed") + + save_path.parent.mkdir(parents=True, exist_ok=True) + + with open(save_path, 'wb') as f: + f.write(file_data) + + result['saved_path'] = str(save_path) + logger.info(f"Attachment saved to {save_path}") + + logger.info(f"Successfully downloaded attachment: {attachment_size} bytes") + return result + + except Exception as e: + logger.error(f"Failed to download attachment: {str(e)}") + raise Exception(f"Unable to download attachment: {str(e)}") + + +@server.tool() +@handle_http_errors("list_gmail_message_attachments", is_read_only=True, service_type="gmail") +@require_google_service("gmail", "gmail_read") +async def list_gmail_message_attachments( + service, + message_id: str, + user_google_email: str +) -> List[Dict[str, Any]]: + """ + List all attachments in a Gmail message. + + Args: + service: The Gmail API service object + message_id (str): The message ID to check for attachments + user_google_email (str): The user's Google email address + + Returns: + list: List of attachment dictionaries with metadata + """ + logger.info(f"[list_gmail_message_attachments] Message ID: '{message_id}'") + + # Get the full message + message_full = await asyncio.to_thread( + service.users() + .messages() + .get( + userId="me", + id=message_id, + format="full", + ) + .execute + ) + + # Extract attachments + payload = message_full.get("payload", {}) + attachments = _extract_attachments(payload, message_id) + + logger.info(f"Found {len(attachments)} attachments in message {message_id}") + return attachments + + +def _read_pdf_content(file_data: bytes) -> str: + """Extract text content from PDF bytes using pdfplumber""" + if not PDF_AVAILABLE: + return "PDF reading not available. Please install: pip install pdfplumber" + + try: + pdf_file = io.BytesIO(file_data) + text_content = [] + + with pdfplumber.open(pdf_file) as pdf: + for page_num, page in enumerate(pdf.pages): + try: + page_text = page.extract_text() + if page_text and page_text.strip(): + text_content.append(f"--- Page {page_num + 1} ---\n{page_text}") + + # Also extract tables if present + tables = page.extract_tables() + if tables: + for table_num, table in enumerate(tables): + if table: + text_content.append(f"--- Page {page_num + 1} Table {table_num + 1} ---") + for row in table: + if row: + text_content.append(" | ".join(str(cell) if cell else "" for cell in row)) + except Exception as e: + text_content.append(f"--- Page {page_num + 1} (Error reading page) ---\nError: {str(e)}") + + return "\n\n".join(text_content) if text_content else "No text content found in PDF" + + except Exception as e: + return f"Error reading PDF: {str(e)}" + + +def _read_docx_content(file_data: bytes) -> str: + """Extract text content from DOCX bytes using mammoth""" + if not DOCX_AVAILABLE: + return "DOCX reading not available. Please install: pip install mammoth" + + try: + docx_file = io.BytesIO(file_data) + + # Extract raw text + result = mammoth.extract_raw_text(docx_file) + text_content = result.value.strip() if result.value else "" + + # Check for conversion messages/warnings + if result.messages: + warnings = [msg.message for msg in result.messages] + if warnings: + text_content += "\n\n--- Conversion Notes ---\n" + "\n".join(warnings) + + return text_content if text_content else "No text content found in DOCX" + + except Exception as e: + return f"Error reading DOCX: {str(e)}" + + +def _read_xlsx_content(file_data: bytes) -> str: + """Extract text content from Excel files using pyxlsb and pandas""" + if not EXCEL_AVAILABLE: + return "Excel reading not available. Please install: pip install pyxlsb pandas" + + try: + xlsx_file = io.BytesIO(file_data) + + # Try to detect file format + xlsx_file.seek(0) + header = xlsx_file.read(8) + xlsx_file.seek(0) + + content = [] + + # Handle .xlsb files with pyxlsb + if b'Microsoft' in header or file_data.startswith(b'\x09\x08\x04\x00'): + try: + # Read XLSB file using pyxlsb + with pyxlsb.open_workbook(xlsx_file) as wb: + for sheet_name in wb.sheets: + content.append(f"--- Sheet: {sheet_name} ---") + + rows = [] + with wb.get_sheet(sheet_name) as sheet: + for row in sheet.rows(): + if row: + row_text = "\t".join(str(cell.v) if cell and cell.v is not None else "" for cell in row) + if row_text.strip(): + rows.append(row_text) + + if rows: + content.append("\n".join(rows)) + else: + content.append("No data in this sheet") + + except Exception as e: + # Fallback to pandas for regular Excel files + content = [] + excel_data = pd.read_excel(xlsx_file, sheet_name=None, engine='openpyxl') + + for sheet_name, df in excel_data.items(): + content.append(f"--- Sheet: {sheet_name} ---") + + if not df.empty: + sheet_content = df.to_string(index=False, na_rep='') + content.append(sheet_content) + else: + content.append("No data in this sheet") + else: + # Handle regular Excel files with pandas + excel_data = pd.read_excel(xlsx_file, sheet_name=None, engine='openpyxl') + + for sheet_name, df in excel_data.items(): + content.append(f"--- Sheet: {sheet_name} ---") + + if not df.empty: + sheet_content = df.to_string(index=False, na_rep='') + content.append(sheet_content) + else: + content.append("No data in this sheet") + + return "\n\n".join(content) if content else "No content found in Excel file" + + except Exception as e: + return f"Error reading Excel file: {str(e)}" + + +def _read_csv_content(file_data: bytes) -> str: + """Extract text content from CSV bytes""" + if not CSV_AVAILABLE: + return "CSV reading not available" + + try: + csv_text = file_data.decode('utf-8', errors='ignore') + csv_file = io.StringIO(csv_text) + + # Try to detect CSV dialect + sample = csv_text[:1024] + sniffer = csv.Sniffer() + delimiter = ',' + + try: + dialect = sniffer.sniff(sample) + delimiter = dialect.delimiter + except: + pass # Use default comma delimiter + + csv_file.seek(0) + reader = csv.reader(csv_file, delimiter=delimiter) + + rows = [] + for row_num, row in enumerate(reader, 1): + if row: # Skip empty rows + rows.append(f"Row {row_num}: {' | '.join(row)}") + if row_num > 1000: # Limit to first 1000 rows + rows.append("... (truncated, showing first 1000 rows)") + break + + return "\n".join(rows) if rows else "No content found in CSV" + + except Exception as e: + return f"Error reading CSV: {str(e)}" + + +def _read_html_content(file_data: bytes) -> str: + """Extract text content from HTML using beautifulsoup4""" + if not HTML_AVAILABLE: + return "HTML reading not available. Please install: pip install beautifulsoup4" + + try: + html_text = file_data.decode('utf-8', errors='ignore') + soup = BeautifulSoup(html_text, 'html.parser') + + # Remove script and style elements + for script in soup(["script", "style"]): + script.decompose() + + # Get text content + text = soup.get_text() + + # Clean up whitespace + lines = (line.strip() for line in text.splitlines()) + chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) + text = ' '.join(chunk for chunk in chunks if chunk) + + return text if text else "No text content found in HTML" + + except Exception as e: + return f"Error reading HTML: {str(e)}" + + +def _read_text_content(file_data: bytes, encoding: str = 'utf-8') -> str: + """Extract text content from plain text files""" + try: + # Try UTF-8 first, then other common encodings + encodings = [encoding, 'utf-8', 'latin-1', 'cp1252'] + + for enc in encodings: + try: + return file_data.decode(enc) + except UnicodeDecodeError: + continue + + # If all encodings fail, decode with errors ignored + return file_data.decode('utf-8', errors='ignore') + + except Exception as e: + return f"Error reading text file: {str(e)}" + + +def _read_json_content(file_data: bytes) -> str: + """Extract and format JSON content""" + try: + json_text = file_data.decode('utf-8', errors='ignore') + json_data = json.loads(json_text) + return json.dumps(json_data, indent=2, ensure_ascii=False) + except Exception as e: + return f"Error reading JSON: {str(e)}" + + +@server.tool() +@handle_http_errors("read_gmail_attachment_content", is_read_only=True, service_type="gmail") +@require_google_service("gmail", "gmail_read") +async def read_gmail_attachment_content( + service, + message_id: str, + attachment_name: str, + user_google_email: str, + max_size_mb: int = 50 +) -> Dict[str, Any]: + """ + Download and read the content of a Gmail attachment. + + Supports: PDF, DOCX, XLSX, XLSB, CSV, HTML, TXT, JSON, and other text-based files. + + Args: + service: The Gmail API service object + message_id (str): The message ID containing the attachment + attachment_name (str): The attachment name to read + user_google_email (str): The user's Google email address + max_size_mb (int): Maximum attachment size in MB (default: 50) + + Returns: + dict: Dictionary containing attachment metadata and extracted content + """ + logger.info( + f"[read_gmail_attachment_content] Message ID: '{message_id}', Attachment name: '{attachment_name}'" + ) + + try: + # First, get attachment metadata from the message + message_full = await asyncio.to_thread( + service.users() + .messages() + .get( + userId="me", + id=message_id, + format="full", + ) + .execute + ) + logger.info(f"Retrieved full message for ID: {message_id} with attachment name '{attachment_name}'") + # Find the attachment metadata + payload = message_full.get("payload", {}) + attachments = _extract_attachments(payload, message_id) + + attachment_info = None + for att in attachments: + if att['filename'] == attachment_name: + attachment_info = att + break + + logger.info(f"Found {attachment_info} attachments in message {message_id}") + + attachment_id = attachment_info['attachment_id'] #if attachment_info else attachment_name + + if not attachment_info: + logger.error(f"Attachment {attachment_id} not found. Available attachments: {[att['attachment_id'] for att in attachments]}") + # Try to find by filename if attachment_id doesn't match + if attachments: + logger.info(f"Using first available attachment: {attachments[0]['filename']}") + attachment_info = attachments[0] + attachment_id = attachment_info['attachment_id'] # Update to correct ID + else: + raise Exception(f"Attachment {attachment_id} not found in message {message_id}") + + # Download the attachment + attachment = await asyncio.to_thread( + service.users() + .messages() + .attachments() + .get( + userId="me", + messageId=message_id, + id=attachment_id + ) + .execute + ) + + # Validate file size + attachment_size = int(attachment.get('size', 0)) + max_size_bytes = max_size_mb * 1024 * 1024 + + if attachment_size > max_size_bytes: + raise Exception(f"Attachment too large: {attachment_size / (1024*1024):.1f}MB exceeds limit of {max_size_mb}MB") + + # Decode the attachment data + file_data = base64.urlsafe_b64decode(attachment['data']) + + # Extract content based on file type + filename = attachment_info['filename'].lower() + mime_type = attachment_info['mime_type'].lower() + content = "" + file_type = "unknown" + + # Determine file type and extract content + if filename.endswith('.pdf') or 'pdf' in mime_type: + file_type = "pdf" + content = _read_pdf_content(file_data) + + elif filename.endswith('.docx') or 'wordprocessingml' in mime_type: + file_type = "docx" + content = _read_docx_content(file_data) + + elif filename.endswith(('.xlsx', '.xls', '.xlsb')) or 'spreadsheetml' in mime_type: + file_type = "xlsx" + content = _read_xlsx_content(file_data) + + elif filename.endswith('.csv') or mime_type == 'text/csv': + file_type = "csv" + content = _read_csv_content(file_data) + + elif filename.endswith(('.html', '.htm')) or mime_type == 'text/html': + file_type = "html" + content = _read_html_content(file_data) + + elif filename.endswith('.json') or mime_type == 'application/json': + file_type = "json" + content = _read_json_content(file_data) + + elif (filename.endswith(('.txt', '.log', '.md', '.py', '.js', '.css', '.xml')) or + mime_type.startswith('text/')): + file_type = "text" + content = _read_text_content(file_data) + + else: + # Try to read as text if it's not a known binary format + if not any(ext in filename for ext in ['.jpg', '.jpeg', '.png', '.gif', '.bmp', + '.exe', '.zip', '.rar', '.7z']): + file_type = "text" + content = _read_text_content(file_data) + else: + content = f"Cannot read content from {filename}. File type not supported for content extraction." + file_type = "binary" + + result = { + 'message_id': message_id, + 'attachment_id': attachment_id, + 'filename': attachment_info['filename'], + 'mime_type': attachment_info['mime_type'], + 'size': attachment_size, + 'size_mb': round(attachment_size / (1024 * 1024), 2), + 'file_type': file_type, + 'content': content, + 'content_length': len(content), + 'success': True + } + + logger.info(f"Successfully extracted content from {attachment_info['filename']} ({file_type}): {len(content)} characters") + return result + + except Exception as e: + logger.error(f"Failed to read attachment content: {str(e)}") + return { + 'message_id': message_id, + 'attachment_id': attachment_id, + 'error': str(e), + 'success': False + } + + +@server.tool() +@handle_http_errors("read_all_gmail_message_attachments", is_read_only=True, service_type="gmail") +@require_google_service("gmail", "gmail_read") +async def read_all_gmail_message_attachments( + service, + message_id: str, + user_google_email: str, + max_size_mb: int = 50 +) -> List[Dict[str, Any]]: + """ + Read the content of all attachments in a Gmail message. + + Args: + service: The Gmail API service object + message_id (str): The message ID to read attachments from + user_google_email (str): The user's Google email address + max_size_mb (int): Maximum attachment size in MB per file (default: 50) + + Returns: + list: List of dictionaries containing attachment content and metadata + """ + logger.info(f"[read_all_gmail_message_attachments] Message ID: '{message_id}'") + + try: + # Get list of attachments + attachments = await list_gmail_message_attachments(service, message_id, user_google_email) + + if not attachments: + logger.info(f"No attachments found in message {message_id}") + return [] + + results = [] + for attachment in attachments: + try: + result = await read_gmail_attachment_content( + service=service, + message_id=message_id, + attachment_id=attachment['attachment_id'], + user_google_email=user_google_email, + max_size_mb=max_size_mb + ) + results.append(result) + + except Exception as e: + logger.error(f"Failed to read attachment {attachment['filename']}: {str(e)}") + results.append({ + 'message_id': message_id, + 'attachment_id': attachment['attachment_id'], + 'filename': attachment['filename'], + 'error': str(e), + 'success': False + }) + + return results + + except Exception as e: + logger.error(f"Failed to read message attachments: {str(e)}") + raise Exception(f"Unable to read message attachments: {str(e)}") + + +# Usage example functions +@server.tool() +@handle_http_errors("download_all_attachments_from_message", is_read_only=False, service_type="gmail") +@require_google_service("gmail", "gmail_read") +async def download_all_attachments_from_message( + service, + message_id: str, + user_google_email: str, + download_dir: str = "downloads", + max_size_mb: int = 100 +) -> List[Dict[str, Any]]: + """ + Download all attachments from a Gmail message. + + Args: + service: The Gmail API service object + message_id (str): The message ID + user_google_email (str): The user's email + download_dir (str): Directory to save attachments + max_size_mb (int): Max size per attachment in MB + + Returns: + list: List of download results + """ + # Get list of attachments + attachments = await list_gmail_message_attachments(service, message_id, user_google_email) + + if not attachments: + logger.info(f"No attachments found in message {message_id}") + return [] + + # Create download directory + download_path = Path(download_dir) + download_path.mkdir(exist_ok=True) + + results = [] + for attachment in attachments: + try: + filename = attachment['filename'] + safe_filename = "".join(c for c in filename if c.isalnum() or c in (' ', '.', '_', '-')).strip() + file_path = download_path / safe_filename + + result = await download_gmail_attachment( + service=service, + message_id=message_id, + attachment_id=attachment['attachment_id'], + user_google_email=user_google_email, + save_path=str(file_path), + max_size_mb=max_size_mb + ) + + result['original_filename'] = filename + results.append(result) + + except Exception as e: + logger.error(f"Failed to download attachment {attachment['filename']}: {str(e)}") + results.append({ + 'attachment_id': attachment['attachment_id'], + 'original_filename': attachment['filename'], + 'error': str(e) + }) + + return results + def _extract_headers(payload: dict, header_names: List[str]) -> Dict[str, str]: """ Extract specified headers from a Gmail message payload. diff --git a/pyproject.toml b/pyproject.toml index 4b9ab182..003baee9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,13 +10,18 @@ readme = "README.md" keywords = [ "mcp", "google", "workspace", "llm", "ai", "claude", "model", "context", "protocol", "server"] requires-python = ">=3.10" dependencies = [ + "beautifulsoup4>=4.13.4", "fastapi>=0.115.12", "fastmcp>=2.10.6", "google-api-python-client>=2.168.0", "google-auth-httplib2>=0.2.0", "google-auth-oauthlib>=1.2.2", "httpx>=0.28.1", + "mammoth>=1.10.0", + "pandas>=2.3.1", + "pdfplumber>=0.11.7", "pyjwt>=2.10.1", + "pyxlsb>=1.0.10", "ruff>=0.12.4", "tomlkit", ] diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..30ab1415 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,186 @@ +aiohappyeyeballs==2.4.4 +aiohttp==3.11.11 +aiosignal==1.3.2 +annotated-types==0.7.0 +anthropic==0.45.2 +anyio==4.9.0 +attrs==25.1.0 +Authlib==1.6.1 +bidict==0.23.1 +blinker==1.9.0 +CacheControl==0.14.3 +cachetools==5.5.2 +certifi==2025.1.31 +cffi==1.17.1 +charset-normalizer==3.4.1 +click==8.1.8 +cloudpickle==3.1.1 +cryptography==45.0.5 +cyclopts==3.22.5 +dataclasses-json==0.6.7 +defusedxml==0.7.1 +distro==1.9.0 +dnspython==2.7.0 +docstring_parser==0.17.0 +docutils==0.22 +email_validator==2.2.0 +et_xmlfile==2.0.0 +exceptiongroup==1.3.0 +fastapi==0.116.1 +fastmcp==2.11.1 +firebase_admin==7.1.0 +Flask==3.1.0 +Flask-Cors==5.0.0 +Flask-SocketIO==5.5.1 +frozenlist==1.5.0 +google-adk==1.8.0 +google-api-core==2.25.1 +google-api-python-client==2.177.0 +google-auth==2.40.3 +google-auth-httplib2==0.2.0 +google-auth-oauthlib==1.2.2 +google-cloud-aiplatform==1.105.0 +google-cloud-appengine-logging==1.6.2 +google-cloud-audit-log==0.3.2 +google-cloud-bigquery==3.35.1 +google-cloud-core==2.4.3 +google-cloud-firestore==2.21.0 +google-cloud-logging==3.12.1 +google-cloud-resource-manager==1.14.2 +google-cloud-secret-manager==2.24.0 +google-cloud-speech==2.33.0 +google-cloud-storage==2.19.0 +google-cloud-trace==1.16.2 +google-crc32c==1.7.1 +google-genai==1.27.0 +google-resumable-media==2.7.2 +googleapis-common-protos==1.70.0 +graphviz==0.21 +grpc-google-iam-v1==0.14.2 +grpcio==1.74.0 +grpcio-status==1.74.0 +h11==0.14.0 +h2==4.2.0 +hpack==4.1.0 +httpcore==1.0.7 +httplib2==0.22.0 +httpx==0.28.1 +httpx-sse==0.4.0 +hyperframe==6.1.0 +idna==3.10 +importlib_metadata==8.7.0 +isodate==0.7.2 +itsdangerous==2.2.0 +Jinja2==3.1.5 +jiter==0.8.2 +jsonpatch==1.33 +jsonpointer==3.0.0 +jsonschema==4.25.0 +jsonschema-path==0.3.4 +jsonschema-specifications==2025.4.1 +langchain==0.3.17 +langchain-anthropic==0.3.5 +langchain-community==0.3.16 +langchain-core==0.3.33 +langchain-text-splitters==0.3.5 +langsmith==0.3.4 +lazy-object-proxy==1.11.0 +markdown-it-py==3.0.0 +MarkupSafe==3.0.2 +marshmallow==3.26.1 +mcp==1.12.2 +mdurl==0.1.2 +more-itertools==10.7.0 +msgpack==1.1.1 +multidict==6.1.0 +mypy-extensions==1.0.0 +numpy==2.2.3 +oauthlib==3.3.1 +openai==1.61.1 +openapi-core==0.19.5 +openapi-pydantic==0.5.1 +openapi-schema-validator==0.6.3 +openapi-spec-validator==0.7.2 +openpyxl==3.1.5 +opentelemetry-api==1.35.0 +opentelemetry-exporter-gcp-trace==1.9.0 +opentelemetry-resourcedetector-gcp==1.9.0a0 +opentelemetry-sdk==1.35.0 +opentelemetry-semantic-conventions==0.56b0 +orjson==3.10.15 +outcome==1.3.0.post0 +packaging==24.2 +pandas==2.2.3 +parse==1.20.2 +pathable==0.4.4 +pinecone-client==5.0.1 +pinecone-plugin-inference==1.1.0 +pinecone-plugin-interface==0.0.7 +propcache==0.2.1 +proto-plus==1.26.1 +protobuf==6.31.1 +pyasn1==0.6.1 +pyasn1_modules==0.4.2 +pycparser==2.22 +pydantic==2.11.7 +pydantic-settings==2.7.1 +pydantic_core==2.33.2 +Pygments==2.19.2 +PyJWT==2.10.1 +pymongo==4.11.1 +PyMuPDF==1.25.3 +pyparsing==3.2.3 +pypdf==5.2.0 +pyperclip==1.9.0 +PySocks==1.7.1 +python-dateutil==2.9.0.post0 +python-dotenv==1.1.1 +python-engineio==4.11.2 +python-multipart==0.0.20 +python-socketio==5.12.1 +pytz==2025.1 +PyYAML==6.0.2 +referencing==0.36.2 +requests==2.32.4 +requests-oauthlib==2.0.0 +requests-toolbelt==1.0.0 +rfc3339-validator==0.1.4 +rich==14.1.0 +rich-rst==1.3.1 +rpds-py==0.26.0 +rsa==4.9.1 +ruff==0.12.7 +schedule==1.2.2 +selenium==4.28.1 +setuptools==75.8.2 +shapely==2.1.1 +simple-websocket==1.1.0 +six==1.17.0 +sniffio==1.3.1 +sortedcontainers==2.4.0 +SQLAlchemy==2.0.37 +sse-starlette==3.0.2 +starlette==0.47.2 +tenacity==8.5.0 +tomlkit==0.13.3 +tqdm==4.67.1 +trio==0.28.0 +trio-websocket==0.11.1 +typing-inspect==0.9.0 +typing-inspection==0.4.1 +typing_extensions==4.12.2 +tzdata==2025.1 +tzlocal==5.3.1 +uritemplate==4.2.0 +urllib3==2.3.0 +uvicorn==0.34.0 +watchdog==6.0.0 +webdriver-manager==4.0.2 +websocket-client==1.8.0 +websockets==15.0.1 +Werkzeug==3.1.1 +-e git+https://github.com/taylorwilsdon/google_workspace_mcp.git@6fcefd599bbf5b430992bec31bd204e8d9982992#egg=workspace_mcp +wsproto==1.2.0 +yarl==1.18.3 +zipp==3.23.0 +zstandard==0.23.0