mxgo-core/mxtoai/api.py at d6e89a0ab69477ec92a0dfb2e276d6da6ce4d77e · mxgoai/mxgo-core · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
import json
import os
import shutil
from datetime import datetime
from email.utils import getaddresses
from pathlib import Path
from typing import Annotated, Any, Optional

import aiofiles
from dotenv import load_dotenv
from fastapi import Depends, FastAPI, File, Form, HTTPException, Response, UploadFile, status
from fastapi.security import APIKeyHeader

from mxtoai._logging import get_logger
from mxtoai.agents.email_agent import EmailAgent
from mxtoai.config import ATTACHMENTS_DIR, SKIP_EMAIL_DELIVERY
from mxtoai.dependencies import processing_instructions_resolver
from mxtoai.email_sender import (
    generate_email_id,
    send_email_reply,
)
from mxtoai.schemas import EmailAttachment, EmailRequest
from mxtoai.tasks import process_email_task
from mxtoai.validators import validate_api_key, validate_attachments, validate_email_handle, validate_email_whitelist

# Load environment variables
load_dotenv()

# Configure logging
logger = get_logger(__name__)

app = FastAPI()
if os.environ["IS_PROD"].lower() == "true":
    app.openapi_url = None

api_auth_scheme = APIKeyHeader(name="x-api-key", auto_error=True)

# Create the email agent on startup
email_agent = EmailAgent(attachment_dir=ATTACHMENTS_DIR, verbose=True, enable_deep_research=True)


# Function to cleanup attachment files and directory
def cleanup_attachments(directory_path: str) -> bool:
    """
    Delete attachment directory and all its contents

    Args:
        directory_path (str): Path to the directory to be deleted

    Returns:
        bool: True if deletion was successful, False otherwise
    """
    try:
        if os.path.exists(directory_path):
            shutil.rmtree(directory_path)
            logger.info(f"Deleted attachment directory: {directory_path}")
        return True
    except Exception as e:
        logger.error(f"Error deleting attachment directory {directory_path}: {e!s}")
        return False


def create_success_response(
    summary: str, email_response: dict[str, Any], attachment_info: list[dict[str, Any]]
) -> Response:
    """
    Create a success response with summary and email details

    Args:
        summary (str): Summary of the email processing
        email_response (dict): Response from the email sending service
        attachment_info (list): List of processed attachments

    Returns:
        Response: FastAPI Response object with JSON content
    """
    return Response(
        content=json.dumps(
            {
                "message": "Email processed and reply sent",
                "summary": summary,
                "email_id": email_response.get("MessageId", ""),
                "attachments_saved": len(attachment_info),
                "attachments_deleted": True,
            }
        ),
        status_code=status.HTTP_200_OK,
        media_type="application/json",
    )


def create_error_response(summary: str, attachment_info: list[dict[str, Any]], error: str) -> Response:
    """
    Create an error response with summary and error details

    Args:
        summary (str): Summary of the email processing
        attachment_info (list): List of processed attachments
        error (str): Error message

    Returns:
        Response: FastAPI Response object with JSON content
    """
    return Response(
        content=json.dumps(
            {
                "message": "Email processed but reply could not be sent",
                "summary": summary,
                "attachments_saved": len(attachment_info),
                "attachments_deleted": True,
                "error": str(error),
            }
        ),
        status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
        media_type="application/json",
    )


# Helper function to handle uploaded files
async def handle_file_attachments(
    attachments: list[EmailAttachment], email_id: str, email_data: EmailRequest
) -> tuple[str, list[dict[str, Any]]]:
    """
    Process uploaded files and save them as attachments

    Args:
        attachments (list[EmailAttachment]): List of EmailAttachment objects
        email_id (str): Unique identifier for the email
        email_data (EmailRequest): EmailRequest object containing email details

    Returns:
        tuple[str, list[dict[str, Any]]]: Tuple containing the directory path and list of processed attachments
    """
    email_attachments_dir = ""
    attachment_info = []

    if not attachments:
        logger.debug("No files to process")
        return email_attachments_dir, attachment_info

    # Create directory for this email's attachments using pathlib
    email_attachments_dir = str(Path(ATTACHMENTS_DIR) / email_id)
    Path(email_attachments_dir).mkdir(parents=True, exist_ok=True)
    logger.info(f"Created attachments directory: {email_attachments_dir}")

    # Process each attachment
    for idx, attachment in enumerate(attachments):
        try:
            # Log file details
            logger.info(
                f"Processing file {idx + 1}/{len(attachments)}: {attachment.filename} ({attachment.contentType})"
            )

            # Validate file size
            if not attachment.content or len(attachment.content) == 0:
                logger.error(f"Empty content received for file: {attachment.filename}")
                msg = "Empty attachment"
                raise ValueError(msg)

            # Validate file type
            if attachment.contentType in ["application/x-msdownload", "application/x-executable"]:
                logger.error(f"Unsupported file type: {attachment.contentType}")
                msg = "Unsupported file type"
                raise ValueError(msg)

            # Sanitize filename for storage
            safe_filename = Path(attachment.filename).name
            if not safe_filename:
                safe_filename = f"attachment_{idx}.bin"
                logger.warning(f"Using generated filename for attachment {idx}: {safe_filename}")

            # Truncate filename if too long (max 100 chars)
            if len(safe_filename) > 100:
                ext = Path(safe_filename).suffix
                safe_filename = safe_filename[:95] + ext
                logger.warning(f"Truncated long filename to: {safe_filename}")

            # Full path to save the attachment
            storage_path = str(Path(email_attachments_dir) / safe_filename)
            logger.debug(f"Will save file to: {storage_path}")

            # Write content to disk
            async with aiofiles.open(storage_path, "wb") as f:
                await f.write(attachment.content)

            # Verify file was saved correctly
            if not Path(storage_path).exists():
                msg = f"Failed to save file: {storage_path}"
                raise OSError(msg)

            file_size = Path(storage_path).stat().st_size
            if file_size == 0:
                msg = f"Saved file is empty: {storage_path}"
                raise OSError(msg)

            # Store attachment info with storage path
            attachment_info.append(
                {
                    "filename": safe_filename,
                    "type": attachment.contentType,
                    "path": storage_path,
                    "size": file_size,
                }
            )

            # Update EmailAttachment object - no need to store content after saving
            email_data.attachments.append(
                EmailAttachment(
                    filename=safe_filename, contentType=attachment.contentType, size=file_size, path=storage_path
                )
            )

            logger.info(f"Successfully saved attachment: {safe_filename} ({file_size} bytes)")

        except ValueError as e:
            logger.error(f"Validation error for file {attachment.filename}: {e!s}")
            raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e))
        except Exception as e:
            logger.exception(f"Error processing file {attachment.filename}: {e!s}")
            # Try to clean up any partially saved file
            try:
                if Path(storage_path).exists():
                    Path(storage_path).unlink()
                    logger.info(f"Cleaned up partial file: {storage_path}")
            except Exception as cleanup_error:
                logger.error(f"Error cleaning up partial file: {cleanup_error!s}")

    # If no attachments were successfully saved, clean up the directory
    if not attachment_info and Path(email_attachments_dir).exists():
        logger.warning("No attachments were successfully saved, cleaning up directory")
        import shutil

        shutil.rmtree(email_attachments_dir)
        email_attachments_dir = ""
    else:
        logger.info(f"Successfully processed {len(attachment_info)} attachments")

    return email_attachments_dir, attachment_info


# Helper function to send email reply using SES
async def send_agent_email_reply(email_data: EmailRequest, processing_result: dict[str, Any]) -> dict[str, Any]:
    """
    Send email reply using SES and return the response details

    Args:
        email_data (EmailRequest): EmailRequest object containing email details
        processing_result (dict): Result of the email processing

    Returns:
        dict: Response details including status and message ID
    """
    if not processing_result or "email_content" not in processing_result:
        logger.error("Invalid processing result format")
        return {"status": "error", "error": "Invalid processing result format", "timestamp": datetime.now().isoformat()}

    # Skip email delivery for test emails
    if email_data.from_email in SKIP_EMAIL_DELIVERY:
        logger.info(f"Skipping email delivery for test email: {email_data.from_email}")
        return {
            "status": "skipped",
            "message": "Email delivery skipped for test email",
            "timestamp": datetime.now().isoformat(),
        }

    # Get email body content
    email_content = processing_result["email_content"]
    html_content = email_content.get("enhanced", {}).get("html") or email_content.get("html")
    text_content = email_content.get("enhanced", {}).get("text") or email_content.get("text")

    # Handle case where no content was generated
    if not text_content:
        logger.error("No email content was generated")
        return {"status": "error", "error": "No email content was generated", "timestamp": datetime.now().isoformat()}

    # --- Prepare attachments ---
    attachments_to_send = []
    if processing_result.get("calendar_data") and processing_result["calendar_data"].get("ics_content"):
        ics_content = processing_result["calendar_data"]["ics_content"]
        attachments_to_send.append(
            {
                "filename": "invite.ics",
                "content": ics_content,  # Should be string or bytes
                "mimetype": "text/calendar",
            }
        )
        logger.info("Prepared invite.ics for attachment.")
    # Add logic here if other types of attachments need to be sent back

    # Format the email dict for SES
    ses_email_dict = {
        "from": email_data.from_email,  # Original sender becomes recipient
        "to": email_data.to,  # Original recipient becomes sender
        "subject": email_data.subject,
        "messageId": email_data.messageId,
        "references": email_data.references,
        "inReplyTo": email_data.messageId,
        "cc": email_data.cc,
    }

    try:
        # Log details including CC
        logger.info(
            f"Sending email reply to {ses_email_dict['from']} about '{ses_email_dict['subject']}' with CC: {ses_email_dict.get('cc')}"
        )

        # --- Pass attachments to send_email_reply ---
        email_response = await send_email_reply(
            original_email=ses_email_dict,
            reply_text=text_content,
            reply_html=html_content,
            attachments=attachments_to_send,  # Pass prepared attachments
        )

        reply_result = {
            "status": "success",
            "message_id": email_response.get("MessageId", ""),
            "to": ses_email_dict["from"],  # Who we're sending to
            "from": ses_email_dict["to"],  # Who it appears to be from
            "timestamp": datetime.now().isoformat(),
        }

        logger.info(f"Email sent successfully with message ID: {reply_result['message_id']}")
        return reply_result

    except Exception as e:
        logger.error(f"Error sending email reply: {e!s}", exc_info=True)
        return {"status": "error", "error": str(e), "timestamp": datetime.now().isoformat()}


# Helper function to create sanitized response
def sanitize_processing_result(processing_result: dict[str, Any]) -> dict[str, Any]:
    """
    Create a clean response suitable for API return and database storage

    Args:
        processing_result (dict): Result of the email processing

    Returns:
        dict: Sanitized response with metadata, research, and attachment info
    """
    if not isinstance(processing_result, dict):
        return {"error": "Invalid processing result format", "timestamp": datetime.now().isoformat()}

    # Start with metadata which is already clean
    sanitized_result = {"metadata": processing_result.get("metadata", {})}

    # Include research if available
    if "research" in processing_result:
        sanitized_result["research"] = processing_result["research"]

    # Include clean attachment info
    if "attachments" in processing_result:
        sanitized_result["attachments"] = {
            "summary": processing_result["attachments"].get("summary"),
            "processed": processing_result["attachments"].get("processed", []),
        }

    # Include email content lengths for monitoring
    if "email_content" in processing_result:
        email_content = processing_result["email_content"]
        sanitized_result["email_content_stats"] = {
            "html_length": len(email_content.get("html", "")) if email_content.get("html") else 0,
            "text_length": len(email_content.get("text", "")) if email_content.get("text") else 0,
            "has_enhanced_content": bool(
                email_content.get("enhanced", {}).get("html") or email_content.get("enhanced", {}).get("text")
            ),
        }

    return sanitized_result


@app.post("/process-email")
async def process_email(
    from_email: Annotated[str, Form()] = ...,
    to: Annotated[str, Form()] = ...,
    subject: Annotated[Optional[str], Form()] = "",
    textContent: Annotated[Optional[str], Form()] = "",
    htmlContent: Annotated[Optional[str], Form()] = "",
    messageId: Annotated[Optional[str], Form()] = None,
    date: Annotated[Optional[str], Form()] = None,
    emailId: Annotated[Optional[str], Form()] = None,
    rawHeaders: Annotated[Optional[str], Form()] = None,
    files: Annotated[list[UploadFile] | None, File()] = None,
    api_key: str = Depends(api_auth_scheme),
):
    """
    Process an incoming email with attachments, analyze content, and send reply

    Args:
        from_email (str): Sender's email address
        to (str): Recipient's email address
        subject (str): Subject of the email
        textContent (str): Plain text content of the email
        htmlContent (str): HTML content of the email
        messageId (str): Unique identifier for the email message
        date (str): Date when the email was sent
        emailId (str): Unique identifier for the email in the system
        rawHeaders (str): Raw headers of the email in JSON format
        files (list[UploadFile] | None): List of uploaded files as attachments
        api_key (str): API key for authentication

    Returns:
        Response: FastAPI Response object with JSON content
    """
    # Validate API key
    if response := await validate_api_key(api_key):
        return response

    if files is None:
        files = []
    parsed_headers = {}
    try:
        # Parse raw headers if provided
        if rawHeaders:
            try:
                parsed_headers = json.loads(rawHeaders)
                logger.info(f"Received raw headers: {json.dumps(parsed_headers, indent=2)}")
            except json.JSONDecodeError:
                logger.warning(f"Could not parse rawHeaders JSON: {rawHeaders}")
                # Continue processing even if headers are malformed

        # Validate email whitelist
        if response := await validate_email_whitelist(from_email, to, subject, messageId):
            return response

        # Validate email handle
        response, handle = await validate_email_handle(to, from_email, subject, messageId)
        if response:
            return response

        # Convert uploaded files to dictionaries for validation
        attachments_for_validation = []
        for file in files:
            content = await file.read()
            attachments_for_validation.append(
                {"filename": file.filename, "contentType": file.content_type, "size": len(content)}
            )
            await file.seek(0)  # Reset file pointer for later use

        # Validate attachments
        if response := await validate_attachments(attachments_for_validation, from_email, to, subject, messageId):
            return response

        # Convert validated files to EmailAttachment objects
        attachments = []
        for file in files:
            content = await file.read()
            attachments.append(
                EmailAttachment(
                    filename=file.filename,
                    contentType=file.content_type,
                    content=content,  # Store binary content directly
                    size=len(content),
                    path=None,  # Path will be set after saving to disk
                )
            )
            logger.info(f"Received attachment: {file.filename} (type: {file.content_type}, size: {len(content)} bytes)")
            await file.seek(0)  # Reset file pointer for later use

        # Get handle configuration
        email_instructions = processing_instructions_resolver(handle)  # Safe to use direct access now

        # Log initial email details
        logger.info("Received new email request:")
        logger.info(f"To: {to} (handle: {handle})")
        logger.info(f"Subject: {subject}")
        logger.info(f"Message ID: {messageId}")
        logger.info(f"Date: {date}")
        logger.info(f"Email ID: {emailId}")
        logger.info(f"Number of attachments: {len(files)}")
        # Log raw headers count if present
        if parsed_headers:
            logger.info(f"Number of raw headers received: {len(parsed_headers)}")

        # Parse CC addresses from raw headers
        cc_list = []
        raw_cc_header = parsed_headers.get("cc", "")
        if isinstance(raw_cc_header, str) and raw_cc_header:
            try:
                # Use getaddresses to handle names and comma separation
                addresses = getaddresses([raw_cc_header])
                cc_list = [addr for name, addr in addresses if addr]
                if cc_list:
                    logger.info(f"Parsed CC list: {cc_list}")
            except Exception as e:
                logger.warning(f"Could not parse CC header '{raw_cc_header}': {e!s}")

        # Create EmailRequest instance
        email_request = EmailRequest(
            from_email=from_email,
            to=to,
            subject=subject,
            textContent=textContent,
            htmlContent=htmlContent,
            messageId=messageId,
            date=date,
            emailId=emailId,
            rawHeaders=parsed_headers,
            cc=cc_list,
            attachments=[],  # Start with empty list, will be updated after saving files
        )

        # Generate email ID
        email_id = generate_email_id(email_request)
        logger.info(f"Generated email ID: {email_id}")

        # Handle attachments only if the handle requires it
        email_attachments_dir = ""
        attachment_info = []
        if email_instructions.process_attachments and attachments:
            email_attachments_dir, attachment_info = await handle_file_attachments(
                attachments, email_id, email_request
            )
            logger.info(f"Processed {len(attachment_info)} attachments successfully")
            logger.info(f"Attachments directory: {email_attachments_dir}")

        # Prepare attachment info for processing
        processed_attachment_info = []
        for info in attachment_info:
            processed_info = {
                "filename": info.get("filename", ""),
                "type": info.get("type", info.get("contentType", "application/octet-stream")),
                "path": info.get("path", ""),
                "size": info.get("size", 0),
            }
            processed_attachment_info.append(processed_info)
            logger.info(
                f"Prepared attachment for processing: {processed_info['filename']} "
                f"(type: {processed_info['type']}, size: {processed_info['size']} bytes)"
            )

        # Enqueue the task for async processing
        process_email_task.send(email_request.model_dump(), email_attachments_dir, processed_attachment_info)
        logger.info(f"Enqueued email {email_id} for processing with {len(processed_attachment_info)} attachments")

        # Return immediate success response
        return Response(
            content=json.dumps(
                {
                    "message": "Email received and queued for processing",
                    "email_id": email_id,
                    "attachments_saved": len(attachment_info),
                    "status": "processing",
                }
            ),
            status_code=status.HTTP_200_OK,
            media_type="application/json",
        )

    except HTTPException as e:
        # Re-raise HTTPException to maintain the correct status code
        raise e
    except Exception as e:
        # Log the error and clean up
        logger.exception("Error processing email request")

        if "email_attachments_dir" in locals() and email_attachments_dir:
            cleanup_attachments(email_attachments_dir)

        # Return error response
        return Response(
            content=json.dumps(
                {
                    "message": "Error processing email request",
                    "error": str(e),
                    "attachments_saved": len(attachment_info) if "attachment_info" in locals() else 0,
                    "attachments_deleted": True,
                }
            ),
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
            media_type="application/json",
        )


if __name__ == "__main__":
    # Run the server if this file is executed directly
    import uvicorn

    uvicorn.run("api:app", host="0.0.0.0", port=8000, reload=True)