diff --git a/.env.example b/.env.example
index 694b6fc..2c3b4dd 100644
--- a/.env.example
+++ b/.env.example
@@ -1,5 +1,5 @@
# override this to your own model config toml
-LITELLM_CONFIG_PATH=model.config.toml
+LITELLM_CONFIG_PATH=./credentials/model.config.example.toml
# Azure OpenAI Configuration (Legacy)
AZURE_OPENAI_MODEL=your_model_name_here # e.g., o3-mini-deep-research
@@ -22,8 +22,10 @@ AWS_ACCESS_KEY_ID=your_aws_access_key_id
AWS_SECRET_ACCESS_KEY=your_aws_secret_access_key
SENDER_EMAIL=your_sender_email@domain.com
-# JINA API Key
+# External Services
JINA_API_KEY="YOUR_JINA_API_KEY" # Leave blank if not using deep research
+BRAVE_SEARCH_API_KEY=""
+RAPIDAPI_KEY=""
# LLM Routing Configuration
# GPT-4o-mini Instance 1
@@ -53,6 +55,7 @@ SUPABASE_URL=your_supabase_url
SUPABASE_KEY=your_supabase_key
SUPABASE_SERVICE_ROLE_KEY=your_supabase_service_role_key
WHITELIST_SIGNUP_URL=your_whitelist_signup_url # e.g., https://yourdomain.com/
+FRONTEND_URL=https://mxtoai.com/
# Server Configuration
PORT=8000
diff --git a/README.md b/README.md
index f1d40f4..29ae80d 100644
--- a/README.md
+++ b/README.md
@@ -96,10 +96,58 @@ poetry run python run_api.py
```
5. Start the workers:
+
+Using only single process and couple of threads for local development:
+
+```bash
+poetry run dramatiq mxtoai.tasks --processes 1 --threads 2 --watch ./.
+```
+
+### Docker Setup (Alternative Installation)
+
+The project can also be run using Docker Compose, which provides an isolated environment with all required services.
+
+1. Ensure you have Docker and Docker Compose installed on your system.
+
+2. Build and start all services:
```bash
-poetry run dramatiq mxtoai.tasks --watch ./.
+docker compose up -d
```
+3. Access the services:
+- API Server: http://localhost:8000
+- RabbitMQ Management: http://localhost:15672 (credentials: guest/guest)
+- Redis: localhost:6379
+- Ollama: localhost:11434 (optional)
+
+#### Service Details
+- **API Server**: FastAPI application running on port 8000
+- **Worker**: Background task processor using Dramatiq
+- **Redis**: Used for caching and session management
+- **RabbitMQ**: Message broker for task queue
+- **Ollama**: Optional LLM service (disabled by default)
+
+#### Running with Ollama
+To include the Ollama service (required for local LLM processing):
+```bash
+docker compose --profile ollama up -d
+```
+
+#### Stopping Services
+```bash
+# Stop all services
+docker compose down
+
+# Stop and remove all data volumes (this will delete all data)
+docker compose down -v
+```
+
+#### Important Notes
+- The Docker setup includes all required services (Redis, RabbitMQ) automatically
+- Model configuration file (`model.config.toml`) should be placed in the `credentials/` directory
+- All services are configured to restart automatically unless stopped manually
+- Data persistence is enabled for Redis, RabbitMQ, and Ollama through Docker volumes
+
### Environment Variables
Copy the `.env.example` file to `.env` and update with your specific configuration:
@@ -110,6 +158,46 @@ LITELLM_CONFIG_PATH=model.config.toml
# Redis configuration
REDIS_HOST=localhost
REDIS_PORT=6379
+REDIS_DB=0
+REDIS_PASSWORD=
+
+# rabbitmq config
+RABBITMQ_HOST=localhost
+RABBITMQ_PORT=5672
+RABBITMQ_USER=guest
+RABBITMQ_PASSWORD=guest
+RABBITMQ_VHOST=/
+RABBITMQ_HEARTBEAT=60 # Default heartbeat interval in seconds
+
+# server config
+PORT=8000
+HOST=0.0.0.0
+LOG_LEVEL=INFO
+IS_PROD=false
+X_API_KEY=your_api_key
+
+# supabase
+SUPABASE_URL=your_supabase_url
+SUPABASE_KEY=your_supabase_key
+SUPABASE_SERVICE_ROLE_KEY=your_supabase_service_role_key
+WHITELIST_SIGNUP_URL=your_whitelist_signup_url # e.g., https://yourdomain.com/
+
+# open ai api key
+AZURE_OPENAI_API_KEY=your_api_key_here
+
+# Hugging Face Token
+HF_TOKEN=your_huggingface_token
+
+# AWS SES Configuration
+AWS_REGION=your_aws_region # e.g., ap-south-1
+AWS_ACCESS_KEY_ID=your_aws_access_key_id
+AWS_SECRET_ACCESS_KEY=your_aws_secret_access_key
+SENDER_EMAIL=your_sender_email@domain.com
+
+# External services
+JINA_API_KEY="YOUR_JINA_API_KEY" # Leave blank if not using deep research
+BRAVE_SEARCH_API_KEY=""
+RAPIDAPI_KEY=""
# Optional for research functionality
JINA_API_KEY=your-jina-api-key
@@ -122,7 +210,7 @@ AZURE_VISION_KEY=your-azure-vision-key
SERPAPI_API_KEY=your-serpapi-api-key
```
-This project supports load balancing and routing across multiple models, so you can define as many models as you'd like. Copy `model.config.example.toml` to a toml file and update it with your preferred configuration. Update `.env` with the path your toml relative to root.
+This project supports load balancing and routing across multiple models, so you can define as many models as you'd like. Copy `credentials/model.config.example.toml` to a toml file and update it in the same directory with your preferred configuration. Update `.env` with the path your toml relative to root.
A sample configuration looks like this:
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 0000000..17ae4c5
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,90 @@
+version: "3.9"
+
+services:
+ redis:
+ image: redis:7-alpine
+ container_name: redis
+ ports:
+ - ${REDIS_PORT:-6379}:${REDIS_PORT:-6379}
+ volumes:
+ - redis_data:/data
+ restart: unless-stopped
+ command: ["redis-server", "--requirepass", "${REDIS_PASSWORD:-changeme}"]
+
+ rabbitmq:
+ image: rabbitmq:3-management
+ container_name: rabbitmq
+ ports:
+ - ${RABBITMQ_PORT:-5672}:${RABBITMQ_PORT:-5672}
+ - ${RABBITMQ_MANAGEMENT_PORT:-15672}:${RABBITMQ_MANAGEMENT_PORT:-15672}
+ volumes:
+ - rabbitmq_data:/var/lib/rabbitmq
+ environment:
+ RABBITMQ_DEFAULT_USER: ${RABBITMQ_USER:-guest}
+ RABBITMQ_DEFAULT_PASS: ${RABBITMQ_PASSWORD:-guest}
+ restart: unless-stopped
+ healthcheck:
+ test: ["CMD", "rabbitmq-diagnostics", "ping"]
+ interval: 30s
+ timeout: 10s
+ retries: 5
+
+ ollama:
+ image: ollama/ollama:latest
+ container_name: ollama
+ ports:
+ - ${OLLAMA_PORT:-11434}:${OLLAMA_PORT:-11434}
+ volumes:
+ - ollama_data:/root/.ollama
+ restart: unless-stopped
+ profiles:
+ - ollama
+
+ api_server:
+ build:
+ context: .
+ dockerfile: docker/api_server.dockerfile
+ container_name: api_server
+ env_file:
+ - .env
+ depends_on:
+ rabbitmq:
+ condition: service_healthy
+ redis:
+ condition: service_started
+ ports:
+ - "8000:8000"
+ environment:
+ - REDIS_URL=redis://:${REDIS_PASSWORD:-changeme}@redis:${REDIS_PORT:-6379}/0
+ - RABBITMQ_URL=amqp://${RABBITMQ_USER:-guest}:${RABBITMQ_PASSWORD:-guest}@rabbitmq:${RABBITMQ_PORT:-5672}/
+ - OLLAMA_URL=http://ollama:${OLLAMA_PORT:-11434}
+ - LITELLM_CONFIG_PATH=/app/credentials/model.config.toml
+ volumes:
+ - ./credentials/model.config.toml:/app/credentials/model.config.toml
+ command: ["poetry", "run", "uvicorn", "mxtoai.api:app", "--host", "0.0.0.0", "--port", "8000", "--workers", "4"]
+
+ worker:
+ build:
+ context: .
+ dockerfile: docker/worker.dockerfile
+ container_name: worker
+ env_file:
+ - .env
+ depends_on:
+ rabbitmq:
+ condition: service_healthy
+ redis:
+ condition: service_started
+ environment:
+ - REDIS_URL=redis://:${REDIS_PASSWORD:-changeme}@redis:${REDIS_PORT:-6379}/0
+ - RABBITMQ_URL=amqp://${RABBITMQ_USER:-guest}:${RABBITMQ_PASSWORD:-guest}@rabbitmq:${RABBITMQ_PORT:-5672}/
+ - OLLAMA_URL=http://ollama:${OLLAMA_PORT:-11434}
+ - LITELLM_CONFIG_PATH=/app/credentials/model.config.toml
+ volumes:
+ - ./credentials/model.config.toml:/app/credentials/model.config.toml
+ command: ["poetry", "run", "dramatiq", "mxtoai.tasks", "--watch", "./."]
+
+volumes:
+ rabbitmq_data:
+ ollama_data:
+ redis_data:
diff --git a/docker/api_server.dockerfile b/docker/api_server.dockerfile
new file mode 100644
index 0000000..90c80a4
--- /dev/null
+++ b/docker/api_server.dockerfile
@@ -0,0 +1,28 @@
+FROM python:3.13-slim-bookworm
+
+# System dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+ curl \
+ build-essential \
+ ffmpeg \
+ && rm -rf /var/lib/apt/lists/*
+
+# Set working directory
+WORKDIR /app
+
+# Install Poetry (latest)
+RUN curl -sSL https://install.python-poetry.org | python3 - && \
+ ln -s /root/.local/bin/poetry /usr/local/bin/poetry
+
+# Copy dependency files first (for cache)
+COPY pyproject.toml poetry.lock ./
+
+# Install dependencies (no virtualenv)
+RUN poetry config virtualenvs.create false && poetry install --no-root --no-interaction --no-ansi
+
+# Copy only the relevant application code
+COPY mxtoai ./mxtoai
+COPY run_api.py .
+
+# Run the API via uvicorn
+CMD ["poetry", "run", "uvicorn", "mxtoai.api:app", "--host", "0.0.0.0", "--port", "8000", "--workers", "4"]
diff --git a/docker/worker.dockerfile b/docker/worker.dockerfile
new file mode 100644
index 0000000..a2caca4
--- /dev/null
+++ b/docker/worker.dockerfile
@@ -0,0 +1,27 @@
+FROM python:3.13-slim-bookworm
+
+# System dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+ curl \
+ build-essential \
+ ffmpeg \
+ && rm -rf /var/lib/apt/lists/*
+
+# Set working directory
+WORKDIR /app
+
+# Install Poetry (latest)
+RUN curl -sSL https://install.python-poetry.org | python3 - && \
+ ln -s /root/.local/bin/poetry /usr/local/bin/poetry
+
+# Copy dependency files first (for cache)
+COPY pyproject.toml poetry.lock ./
+
+# Install dependencies
+RUN poetry config virtualenvs.create false && poetry install --no-root --no-interaction --no-ansi
+
+# Copy only the relevant worker code
+COPY mxtoai ./mxtoai
+
+# Run the Dramatiq worker
+CMD ["poetry", "run", "dramatiq", "mxtoai.tasks", "--watch", "./mxtoai"]
diff --git a/docusaurus-site/docs/introduction.md b/docusaurus-site/docs/introduction.md
index 8d52f4a..e8718bb 100644
--- a/docusaurus-site/docs/introduction.md
+++ b/docusaurus-site/docs/introduction.md
@@ -8,7 +8,7 @@ Welcome to MXtoAI!
MXtoAI is an intelligent email assistant that processes users emails automatically. Users simply forward any email to our specialized handles (email IDs) and the appropriate actions are taken. Most of the actions are AI driven.
-Forward emails to our general email handle `ask@mxtoai.com` or to one of our specialized handles. Get summaries, replies, and research without the complexity.
+Forward emails to our general email handle `ask@mxtoai.com` or to one of our specialized handles. Get summaries, replies, research, and professional PDF exports without the complexity.

*Example: Forwarding an email to ask@mxtoai.com with instructions.*
diff --git a/docusaurus-site/docs/usage.md b/docusaurus-site/docs/usage.md
index 7051fd2..63eb173 100644
--- a/docusaurus-site/docs/usage.md
+++ b/docusaurus-site/docs/usage.md
@@ -18,7 +18,10 @@ Here's a brief overview of the available handles and their primary functions:
* `background@mxtoai.com`: Get background information on entities mentioned.
* `ask@mxtoai.com`: Ask specific questions about the email content.
* `schedule@mxtoai.com`: Extract scheduling information or propose meeting times.
+* `pdf@mxtoai.com`: Export email content as a professional PDF document.
For detailed information on each handle, including aliases and key features, please refer to the [Email Handles section on our website](https://www.mxtoai.com/#usecases).
**Pro tip:** Add our email handles to your contacts for quicker access when forwarding emails.
+
+**Note:** Any handle can export content as PDF by simply asking "convert to PDF" or "export as PDF" in your request.
diff --git a/docusaurus-site/docs/why-use-mxtoai.md b/docusaurus-site/docs/why-use-mxtoai.md
index f324276..7664bcc 100644
--- a/docusaurus-site/docs/why-use-mxtoai.md
+++ b/docusaurus-site/docs/why-use-mxtoai.md
@@ -8,8 +8,9 @@
- Works anywhere - Gmail, Outlook, Apple Mail, even your phone
- Works with attachments - PDFs, images, spreadsheets all processed
- Works with email threads - understands context from conversation history
+- **PDF Export** - any response can be converted to PDF, or use pdf@ to export content directly
-As simple as it sounds, MXtoAI is very powerful. If you spend more than 10 minutes a day reading, understanding, or responding to emails, MXtoAI can probably save you time and mental energy. Whether you're dealing with work emails, personal correspondence, or staying informed, there's likely a use case that fits your daily routine. Here are some use-cases you can use it for.
+As simple as it sounds, MXtoAI is very powerful. If you spend more than 10 minutes a day reading, understanding, or responding to emails, MXtoAI can probably save you time and mental energy. Whether you're dealing with work emails, personal correspondence, or staying informed, there's likely a use case that fits your daily routine. Plus, any content generated can be exported as a professional PDF for sharing, printing, or archiving. Here are some use-cases you can use it for.
## 📧 Daily Email Management
@@ -70,6 +71,10 @@ As simple as it sounds, MXtoAI is very powerful. If you spend more than 10 minut
- Forward to summarize@mxtoai.com - get key requirements and deadlines
- Forward to ask@mxtoai.com with "What's our competitive advantage here?"
+**"I need to document compliance or regulatory communications"**
+- Forward regulatory emails to pdf@mxtoai.com for permanent, clean records
+- Perfect for audit trails and compliance documentation
+
## 🔍 Information Verification & Research
**"I get forwarded news and want to check if it's true"**
@@ -88,6 +93,20 @@ As simple as it sounds, MXtoAI is very powerful. If you spend more than 10 minut
- Forward competitor emails to background@mxtoai.com
- Get market position, recent moves, customer feedback
+## 📄 PDF Export & Document Creation
+
+**"I need to create professional documents from email content"**
+- Forward newsletters to pdf@mxtoai.com - get clean, formatted PDF without email headers
+- Perfect for sharing research, reports, or important communications
+
+**"I want to convert AI responses to shareable documents"**
+- Ask any handle to "generate a report and convert to PDF"
+- Forward to ask@mxtoai.com with "Create a HN newsletter with top AI posts of the week and export as PDF"
+
+**"I need to archive important email content professionally"**
+- Forward contracts, agreements, or important announcements to pdf@mxtoai.com
+- Get clean documents without email clutter - perfect for filing or legal records
+
## 🏠 Personal Life
**"I get complex medical/insurance emails"**
@@ -128,6 +147,10 @@ As simple as it sounds, MXtoAI is very powerful. If you spend more than 10 minut
- Forward to ask@mxtoai.com with "What are the practical applications?"
- Understand real-world relevance quickly
+**"I want to compile research findings into shareable documents"**
+- Forward multiple research emails to ask@mxtoai.com with "Synthesize findings and create PDF report"
+- Perfect for thesis research or professional development
+
## 🛍️ Shopping & Consumer Decisions
**"I get overwhelmed by product comparison emails"**
@@ -199,6 +222,7 @@ As simple as it sounds, MXtoAI is very powerful. If you spend more than 10 minut
**Consultant:**
- "I forward client emails to background@mxtoai.com before meetings to understand industry challenges"
- "I forward RFPs to ask@mxtoai.com with 'What's the hidden agenda here?' to win more bids"
+- "I forward research findings to ask@mxtoai.com with 'Create client presentation and export as PDF' for professional deliverables"
**Retiree:**
- "I forward Medicare emails to simplify@mxtoai.com - healthcare is confusing enough"
diff --git a/mxtoai/_logging.py b/mxtoai/_logging.py
index 50c88f4..5727e91 100644
--- a/mxtoai/_logging.py
+++ b/mxtoai/_logging.py
@@ -1,15 +1,17 @@
+import logging
import os
import sys
from collections.abc import Sequence
-from contextlib import contextmanager
+from contextlib import contextmanager, suppress
from pathlib import Path
from typing import Any
import logfire
from dotenv import load_dotenv
from loguru import logger
+from rich.console import Console
-__all__ = ["get_logger", "span"]
+__all__ = ["get_logger", "get_smolagents_console", "span"]
# Load environment variables
load_dotenv()
@@ -69,11 +71,134 @@
logger.add(**logfire_handler)
logfire.configure(console=False)
+
+class InterceptHandler(logging.Handler):
+ """
+ Intercept standard library logging and redirect to loguru.
+ This captures logs from third-party libraries like LiteLLM, httpx, etc.
+ """
+
+ def emit(self, record):
+ # Get corresponding Loguru level if it exists
+ try:
+ level = logger.level(record.levelname).name
+ except ValueError:
+ level = record.levelno
+
+ # Find caller from where originated the logged message
+ frame, depth = logging.currentframe(), 2
+ while frame and (frame.f_code.co_filename in (logging.__file__, __file__)):
+ frame = frame.f_back
+ depth += 1
+
+ # Use the logger name from the original record for better identification
+ logger_name = record.name if record.name else "unknown"
+
+ # Get the formatted message
+ try:
+ message = record.getMessage()
+ except Exception:
+ message = str(record.msg)
+
+ # Log through loguru with proper context
+ logger.opt(depth=depth, exception=record.exc_info).bind(name=logger_name).log(level, message)
+
+
+# Intercept standard library logging and redirect to loguru
+def setup_stdlib_logging_intercept():
+ """Set up interception of standard library logging."""
+ # Create our intercept handler
+ intercept_handler = InterceptHandler()
+
+ # Configure root logger
+ logging.root.handlers = [intercept_handler]
+ logging.root.setLevel(LOG_LEVEL)
+
+ # Also configure specific loggers that might be problematic
+ third_party_loggers = [
+ "litellm",
+ "httpx",
+ "dramatiq",
+ # "pika", # RabbitMQ client
+ "azure",
+ "openai",
+ "smolagents", # Capture smolagents verbose output
+ "transformers", # HuggingFace transformers
+ "torch", # PyTorch logging
+ "requests", # HTTP requests logging
+ "urllib3", # HTTP library used by requests
+ "aiohttp", # Async HTTP client
+ ]
+
+ for logger_name in third_party_loggers:
+ third_party_logger = logging.getLogger(logger_name)
+ third_party_logger.handlers = [intercept_handler]
+ third_party_logger.setLevel(LOG_LEVEL)
+ third_party_logger.propagate = True
+
+
+# Set up the interception
+setup_stdlib_logging_intercept()
+
# Log a test message to verify logging is working
logger.info("Logging initialized with level: {}", LOG_LEVEL)
logger.debug("Debug logging is enabled")
+class LoguruRichConsole:
+ """
+ Custom Rich console that integrates with loguru.
+ Captures smolagents Rich console output and feeds it into the loguru logging pipeline,
+ which then goes to app.log, debug.log, and logfire for unified observability.
+ """
+
+ def __init__(self):
+ """Initialize the loguru-integrated Rich console."""
+ # Create a standard Rich console for terminal output
+ self.terminal_console = Console()
+ # Get loguru logger for capturing Rich output
+ self.rich_logger = logger.bind(source="smolagents_rich")
+
+ def print(self, *args, **kwargs):
+ """Print to terminal and capture in loguru logging pipeline."""
+ try:
+ # Print to terminal as normal
+ self.terminal_console.print(*args, **kwargs)
+
+ # Capture the content for loguru logging
+ # Convert Rich renderables to plain text for logging
+ content_parts = []
+ for arg in args:
+ if hasattr(arg, "__rich__") or hasattr(arg, "__rich_console__"):
+ # For Rich renderables, capture their string representation
+ content_parts.append(str(arg))
+ else:
+ content_parts.append(str(arg))
+
+ content = " ".join(content_parts)
+
+ # Determine log level based on style or content
+ log_level = "INFO" # Default level
+ style = kwargs.get("style", "")
+ if "error" in style.lower() or "red" in style.lower():
+ log_level = "ERROR"
+ elif "warning" in style.lower() or "yellow" in style.lower():
+ log_level = "WARNING"
+ elif "debug" in content.lower():
+ log_level = "DEBUG"
+
+ # Log to loguru (which feeds to app.log, debug.log, and logfire)
+ self.rich_logger.log(log_level, "Rich Console: {}", content)
+
+ except Exception as e:
+ # Fallback logging if Rich integration fails
+ error_msg = f"Rich console integration error: {e}"
+ logger.error(error_msg)
+ # Still try to print to terminal
+ with suppress(Exception):
+ self.terminal_console.print(*args, **kwargs)
+
+
def get_logger(source: str) -> Any:
"""Get a logger instance bound with the source name."""
return logger.bind(source=source)
@@ -107,3 +232,11 @@ def span(
else:
# Return a dummy context manager that does nothing
yield
+
+
+def get_smolagents_console() -> LoguruRichConsole:
+ """
+ Get a Rich console for smolagents that integrates with loguru.
+ This captures Rich console output and feeds it into the unified logging pipeline.
+ """
+ return LoguruRichConsole()
diff --git a/mxtoai/agents/email_agent.py b/mxtoai/agents/email_agent.py
index f0c115a..fdd2543 100644
--- a/mxtoai/agents/email_agent.py
+++ b/mxtoai/agents/email_agent.py
@@ -11,17 +11,14 @@
# Add imports for the new default tools
from smolagents.default_tools import (
- GoogleSearchTool,
PythonInterpreterTool,
VisitWebpageTool,
- WebSearchTool,
WikipediaSearchTool,
)
-from mxtoai._logging import get_logger
+from mxtoai._logging import get_logger, get_smolagents_console
from mxtoai.models import ProcessingInstructions
from mxtoai.prompts.base_prompts import (
- LIST_FORMATTING_REQUIREMENTS,
MARKDOWN_STYLE_GUIDE,
RESEARCH_GUIDELINES,
RESPONSE_GUIDELINES,
@@ -37,6 +34,7 @@
EmailContentDetails,
EmailRequest,
EmailSentStatus,
+ PDFExportResult,
ProcessedAttachmentDetail,
ProcessingError,
ProcessingMetadata,
@@ -45,10 +43,12 @@
from mxtoai.scripts.visual_qa import azure_visualizer
from mxtoai.tools.attachment_processing_tool import AttachmentProcessingTool
from mxtoai.tools.deep_research_tool import DeepResearchTool
+from mxtoai.tools.external_data.linkedin import initialize_linkedin_data_api_tool, initialize_linkedin_fresh_tool
+from mxtoai.tools.pdf_export_tool import PDFExportTool
from mxtoai.tools.schedule_tool import ScheduleTool
-# Import the refactored fallback search tool
-from mxtoai.tools.search_with_fallback_tool import SearchWithFallbackTool
+# Import the web search tools
+from mxtoai.tools.web_search import BraveSearchTool, DDGSearchTool, GoogleSearchTool
# Load environment variables
load_dotenv(override=True)
@@ -103,82 +103,106 @@ def __init__(
self.visit_webpage_tool = VisitWebpageTool()
self.python_tool = PythonInterpreterTool(authorized_imports=ALLOWED_PYTHON_IMPORTS)
self.wikipedia_search_tool = WikipediaSearchTool()
+ self.pdf_export_tool = PDFExportTool()
- # Initialize complex tools using helper methods
- self.search_with_fallback_tool = self._initialize_search_tools()
+ # Initialize independent search tools
+ self.search_tools = self._initialize_independent_search_tools()
self.research_tool = self._initialize_deep_research_tool(enable_deep_research)
self.available_tools: list[Tool] = [
self.attachment_tool,
self.schedule_tool,
self.visit_webpage_tool,
- self.search_with_fallback_tool,
self.python_tool,
self.wikipedia_search_tool,
+ self.pdf_export_tool,
azure_visualizer,
]
+
+ # Add all available search tools
+ self.available_tools.extend(self.search_tools)
+
if self.research_tool:
self.available_tools.append(self.research_tool)
+ linkedin_fresh_tool = initialize_linkedin_fresh_tool()
+ if linkedin_fresh_tool:
+ self.available_tools.append(linkedin_fresh_tool)
+
+ linkedin_data_api_tool = initialize_linkedin_data_api_tool()
+ if linkedin_data_api_tool:
+ self.available_tools.append(linkedin_data_api_tool)
+
logger.info(f"Agent tools initialized: {[tool.name for tool in self.available_tools]}")
self._init_agent()
logger.info("Email agent initialized successfully")
def _init_agent(self):
- """
- Initialize the ToolCallingAgent with Azure OpenAI.
- """
+ """Initialize the smolagents ToolCallingAgent."""
# Initialize the routed model with the default model group
self.routed_model = RoutedLiteLLMModel()
+ # Create agent
self.agent = ToolCallingAgent(
model=self.routed_model,
tools=self.available_tools,
max_steps=12,
- verbosity_level=2,
+ verbosity_level=2, # Increased back to 2 to capture detailed Rich console output
planning_interval=4,
name="email_processing_agent",
description="An agent that processes emails, generates summaries, replies, and conducts research with advanced capabilities including web search, web browsing, and code execution.",
provide_run_summary=True,
)
- logger.debug("Agent initialized with routed model configuration")
- def _initialize_search_tools(self) -> SearchWithFallbackTool:
- """
- Initializes and configures the search tools, returning the SearchWithFallbackTool.
+ # Set up integrated Rich console that feeds into loguru/logfire pipeline
+ # This captures smolagents verbose output and integrates it with our unified logging
+ smolagents_console = get_smolagents_console()
- Returns:
- SearchWithFallbackTool: The configured search tool with Bing and DuckDuckGo as primary engines and Google as fallback.
+ # Override agent's console with our loguru-integrated console
+ if hasattr(self.agent, "logger") and hasattr(self.agent.logger, "console"):
+ self.agent.logger.console = smolagents_console
+ if (
+ hasattr(self.agent, "monitor")
+ and hasattr(self.agent.monitor, "logger")
+ and hasattr(self.agent.monitor.logger, "console")
+ ):
+ self.agent.monitor.logger.console = smolagents_console
- """
- bing_search_tool = WebSearchTool(engine="bing", max_results=5)
- logger.debug("Initialized WebSearchTool with Bing engine.")
+ logger.debug("Agent initialized with routed model configuration and loguru-integrated Rich console")
- ddg_search_tool = WebSearchTool(engine="duckduckgo", max_results=5)
- logger.debug("Initialized WebSearchTool with DuckDuckGo engine.")
-
- google_search_fallback_tool = self._initialize_google_search_tool()
-
- primary_search_engines: list[Tool] = []
- # Ensure tools are only added if successfully initialized (though WebSearchTool constructor doesn't typically fail here)
- if bing_search_tool: # bing_search_tool is always initialized
- primary_search_engines.append(bing_search_tool)
- if ddg_search_tool: # ddg_search_tool is always initialized
- primary_search_engines.append(ddg_search_tool)
+ def _initialize_independent_search_tools(self) -> list[Tool]:
+ """
+ Initialize independent search tools for DDG, Brave, and Google.
+ The agent will be able to choose which search engine to use based on cost and quality needs.
- if not primary_search_engines: # Should not happen with current WebSearchTool, but good practice
- logger.warning(
- "No primary search engines (Bing, DuckDuckGo) could be initialized for SearchWithFallbackTool."
- )
+ Returns:
+ list[Tool]: List of available search tools.
+ """
+ search_tools = []
+
+ # DDG Search - Always available (free)
+ ddg_tool = DDGSearchTool(max_results=10)
+ search_tools.append(ddg_tool)
+ logger.debug("Initialized DDG search tool (free, first choice)")
+
+ # Brave Search - Available if API key is configured
+ if os.getenv("BRAVE_SEARCH_API_KEY"):
+ brave_tool = BraveSearchTool(max_results=5)
+ search_tools.append(brave_tool)
+ logger.debug("Initialized Brave search tool (moderate cost, better quality)")
+ else:
+ logger.warning("BRAVE_SEARCH_API_KEY not found. Brave search tool not initialized.")
- search_tool = SearchWithFallbackTool(
- primary_search_tools=primary_search_engines, fallback_search_tool=google_search_fallback_tool
- )
+ # Google Search - Available if API keys are configured
+ if os.getenv("SERPAPI_API_KEY") or os.getenv("SERPER_API_KEY"):
+ google_tool = GoogleSearchTool()
+ search_tools.append(google_tool)
+ logger.debug("Initialized Google search tool (premium cost, highest quality)")
+ else:
+ logger.warning("No Google Search API keys found. Google search tool not initialized.")
- primary_names = [getattr(p, "engine", "UnknownEngine") for p in primary_search_engines]
- fallback_name = getattr(google_search_fallback_tool, "name", "None") if google_search_fallback_tool else "None"
- logger.info(f"Initialized SearchWithFallbackTool. Primary engines: {primary_names}, Fallback: {fallback_name}")
- return search_tool
+ logger.info(f"Initialized {len(search_tools)} independent search tools: {[tool.name for tool in search_tools]}")
+ return search_tools
def _get_required_actions(self, mode: str) -> list[str]:
"""
@@ -200,35 +224,6 @@ def _get_required_actions(self, mode: str) -> list[str]:
actions.append("Conduct research")
return actions
- def _initialize_google_search_tool(self) -> Optional[GoogleSearchTool]:
- """
- Initialize Google search tool with either SerpAPI or Serper provider.
-
- Returns:
- Optional[GoogleSearchTool]: Initialized GoogleSearchTool instance or None if initialization fails
-
- """
- if os.getenv("SERPAPI_API_KEY"):
- try:
- tool = GoogleSearchTool(provider="serpapi")
- logger.debug("Initialized GoogleSearchTool with SerpAPI for fallback.")
- return tool
- except ValueError as e:
- logger.warning(f"Failed to initialize GoogleSearchTool with SerpAPI for fallback: {e}")
- elif os.getenv("SERPER_API_KEY"):
- try:
- tool = GoogleSearchTool(provider="serper")
- logger.debug("Initialized GoogleSearchTool with Serper for fallback.")
- return tool
- except ValueError as e:
- logger.warning(f"Failed to initialize GoogleSearchTool with Serper for fallback: {e}")
- else:
- logger.warning(
- "GoogleSearchTool (for fallback) not initialized. Missing SERPAPI_API_KEY or SERPER_API_KEY."
- )
-
- return None
-
def _initialize_deep_research_tool(self, enable_deep_research: bool) -> Optional[DeepResearchTool]:
"""
Initializes the DeepResearchTool if API key is available.
@@ -378,7 +373,7 @@ def _create_task_template(
output_template,
RESPONSE_GUIDELINES,
MARKDOWN_STYLE_GUIDE,
- LIST_FORMATTING_REQUIREMENTS,
+ # LIST_FORMATTING_REQUIREMENTS,
]
return "\n\n".join(filter(None, sections))
@@ -399,6 +394,8 @@ def _process_agent_result(
research_output_findings: Union[str, None] = None
research_output_metadata: Union[AgentResearchMetadata, None] = None
+ pdf_export_result: Union[PDFExportResult, None] = None
+
final_answer_from_llm: Union[str, None] = None
email_text_content: Union[str, None] = None
email_html_content: Union[str, None] = None
@@ -425,7 +422,12 @@ def _process_agent_result(
tool_output = action_out if action_out is not None else obs_out
if tool_name and tool_output is not None:
- needs_parsing = tool_name in ["schedule_generator", "attachment_processor", "deep_research"]
+ needs_parsing = tool_name in [
+ "schedule_generator",
+ "attachment_processor",
+ "deep_research",
+ "pdf_export",
+ ]
if isinstance(tool_output, str) and needs_parsing:
try:
tool_output = ast.literal_eval(tool_output)
@@ -491,6 +493,27 @@ def _process_agent_result(
else:
error_msg = tool_output.get("message", "Schedule generator failed or missing ICS content.")
errors_list.append(ProcessingError(message="Schedule Tool Error", details=error_msg))
+
+ elif tool_name == "pdf_export" and isinstance(tool_output, dict):
+ if tool_output.get("success"):
+ pdf_export_result = PDFExportResult(
+ filename=tool_output.get("filename", "document.pdf"),
+ file_path=tool_output.get("file_path", ""),
+ file_size=tool_output.get("file_size", 0),
+ title=tool_output.get("title", "Document"),
+ pages_estimated=tool_output.get("pages_estimated", 1),
+ mimetype=tool_output.get("mimetype", "application/pdf"),
+ temp_dir=tool_output.get("temp_dir"),
+ )
+ logger.info(f"PDF export successful: {pdf_export_result.filename}")
+ else:
+ error_msg = tool_output.get("error", "PDF export failed")
+ details = tool_output.get("details", "")
+ errors_list.append(
+ ProcessingError(message="PDF Export Error", details=f"{error_msg}. {details}")
+ )
+ logger.error(f"PDF export failed: {error_msg}")
+
else:
logger.debug(
f"[Memory Step {i + 1}] Tool '{tool_name}' output processed (no specific handler). Output: {str(tool_output)[:200]}..."
@@ -592,6 +615,7 @@ def _process_agent_result(
)
if research_output_findings or research_output_metadata
else None,
+ pdf_export=pdf_export_result,
)
except Exception as e:
@@ -649,6 +673,7 @@ def _process_agent_result(
)
if research_output_findings or research_output_metadata
else None,
+ pdf_export=pdf_export_result,
)
def process_email(
@@ -722,4 +747,5 @@ def process_email(
attachments=AttachmentsProcessingResult(processed=[]),
calendar_data=None,
research=None,
+ pdf_export=None,
)
diff --git a/mxtoai/api.py b/mxtoai/api.py
index be1f8d7..be06f9e 100644
--- a/mxtoai/api.py
+++ b/mxtoai/api.py
@@ -13,6 +13,7 @@
from fastapi import Depends, FastAPI, File, Form, HTTPException, Response, UploadFile, status
from fastapi.security import APIKeyHeader
+from mxtoai import validators
from mxtoai._logging import get_logger
from mxtoai.agents.email_agent import EmailAgent
from mxtoai.config import ATTACHMENTS_DIR, SKIP_EMAIL_DELIVERY
@@ -33,7 +34,6 @@
validate_email_whitelist,
validate_rate_limits,
)
-from mxtoai import validators
# Load environment variables
load_dotenv()
@@ -467,6 +467,29 @@ async def process_email(
Response: FastAPI Response object with JSON content
"""
+ # Skip processing for AWS SES system emails
+ if from_email.endswith("@amazonses.com") or ".amazonses.com" in from_email:
+ logger.info(f"Skipping processing for AWS SES system email: {from_email} (subject: {subject})")
+ logger.info(f"AWS SES email content - Text: {textContent}")
+ logger.info(f"AWS SES email content - HTML: {htmlContent}")
+ if rawHeaders:
+ try:
+ parsed_headers = json.loads(rawHeaders)
+ logger.info(f"AWS SES email headers: {json.dumps(parsed_headers, indent=2)}")
+ except json.JSONDecodeError:
+ logger.warning(f"Could not parse AWS SES email headers: {rawHeaders}")
+ return Response(
+ content=json.dumps(
+ {
+ "message": "Skipped processing AWS SES system email",
+ "email": from_email,
+ "status": "skipped",
+ }
+ ),
+ status_code=status.HTTP_200_OK,
+ media_type="application/json",
+ )
+
# Validate API key
if response := await validate_api_key(api_key):
return response
diff --git a/mxtoai/email_handles.py b/mxtoai/email_handles.py
index f8a775e..9b029ec 100644
--- a/mxtoai/email_handles.py
+++ b/mxtoai/email_handles.py
@@ -18,7 +18,7 @@
process_attachments=True,
deep_research_mandatory=True,
add_summary=True,
- target_model="gpt-4-reasoning",
+ target_model="gpt-4",
task_template=template_prompts.RESEARCH_TEMPLATE,
output_template=output_prompts.RESEARCH_OUTPUT_GUIDELINES,
),
@@ -45,7 +45,7 @@
aliases=["factcheck", "verify"],
process_attachments=True,
deep_research_mandatory=False,
- target_model="gpt-4-reasoning",
+ target_model="gpt-4",
task_template=template_prompts.FACT_TEMPLATE,
output_template=output_prompts.FACT_CHECK_OUTPUT_GUIDELINES,
),
@@ -54,7 +54,7 @@
aliases=["background-check", "background"],
process_attachments=True,
deep_research_mandatory=False,
- target_model="gpt-4-reasoning",
+ target_model="gpt-4",
task_template=template_prompts.BACKGROUND_RESEARCH_TEMPLATE,
output_template=output_prompts.BACKGROUND_OUTPUT_GUIDELINES,
),
@@ -77,4 +77,13 @@
task_template=template_prompts.SCHEDULE_TEMPLATE,
output_template=output_prompts.SCHEDULE_OUTPUT_GUIDELINES,
),
+ ProcessingInstructions(
+ handle="pdf",
+ aliases=["export", "convert", "document", "export-pdf"],
+ process_attachments=True,
+ deep_research_mandatory=False,
+ target_model="gpt-4",
+ task_template=template_prompts.PDF_EXPORT_TEMPLATE,
+ output_template=output_prompts.PDF_EXPORT_OUTPUT_GUIDELINES,
+ ),
]
diff --git a/mxtoai/email_provider_domains.txt b/mxtoai/email_provider_domains.txt
index eefd17b..4e0afe2 100644
--- a/mxtoai/email_provider_domains.txt
+++ b/mxtoai/email_provider_domains.txt
@@ -2172,7 +2172,7 @@ justicemail.com
justmail.de
justmailz.com
justmarriedmail.com
-jwspamspy
+jwspamspy
k.ro
kaazoo.com
kabissa.org
@@ -6101,4 +6101,4 @@ zybermail.com
zydecofan.com
zzn.com
zzom.co.uk
-zzz.com
\ No newline at end of file
+zzz.com
diff --git a/mxtoai/prompts/base_prompts.py b/mxtoai/prompts/base_prompts.py
index 088066f..aa9484b 100644
--- a/mxtoai/prompts/base_prompts.py
+++ b/mxtoai/prompts/base_prompts.py
@@ -6,7 +6,7 @@
MARKDOWN FORMATTING REQUIREMENTS:
- **bold** for emphasis
- _italics_ for quotes
-- ### for section headers (if needed)
+- Strictly use `###` for section headers
- Proper bullet points and numbered lists
- Clear paragraph spacing
"""
@@ -18,32 +18,42 @@
- Include only relevant information
- Maintain appropriate tone and style
- Use proper spacing and formatting
-- ALWAYS Indent each nested level with two spaces
+- Try to maintain visual hierarchy of the response using section headers and lists
+- NEVER add numbers to section headers
- DO NOT add any signature - it will be added automatically
+- If web search tools were used, create a 'References' section at the end of your response. List the titles and URLs of the web pages used, formatted as markdown links (e.g., `1. [Page Title](URL)`).
+
+SEARCH TOOL SELECTION GUIDELINES:
+- **ddg_search**: Use first for most queries (free and fast)
+- **brave_search**: Use when DDG results are insufficient or you need better quality/more comprehensive information (moderate API cost)
+- **google_search**: Use only when DDG and Brave are insufficient (premium API cost, highest quality)
+- Choose search tools based on the importance, complexity and quality of search results received
+- Whenever you use a search tool, keep a track of links you visited in memory and later add them as references.
"""
# Formatting requirements for HTML conversion
-LIST_FORMATTING_REQUIREMENTS = """
-NESTED LIST OUTPUT FORMAT GUIDELINES (for Markdown to HTML conversion):
-
-1. Always begin with a **numbered list** (use `1.`).
-2. **Alternate between numbered and bullet lists** at each level of nesting:
- - Level 1: `1.`, `2.`, `3.` (numbered)
- 1. Level 2: `-` (bullet)
- - Level 3: `1.`, `2.`, `3.` (numbered)
- 1. Level 4: `-` (bullet)
- - And so on...
-3. Use **blank lines** between paragraphs and between different list levels.
-
-Example:
-
-1. Main point
- - Sub-point
- 1. Sub-sub-point
- - Sub-sub-sub-point
-
-All list sections **must follow this structure exactly**. Improper nesting or use of list styles will break the HTML conversion.
-"""
+# Not needed anymore, still keeping it for a while just in case
+# LIST_FORMATTING_REQUIREMENTS = """
+# NESTED LIST OUTPUT FORMAT GUIDELINES (for Markdown to HTML conversion):
+
+# 1. Always begin with a **numbered list** (use `1.`).
+# 2. **Alternate between numbered and bullet lists** at each level of nesting:
+# - Level 1: `1.`, `2.`, `3.` (numbered)
+# 1. Level 2: `-` (bullet)
+# - Level 3: `1.`, `2.`, `3.` (numbered)
+# 1. Level 4: `-` (bullet)
+# - And so on...
+# 3. Use **blank lines** between paragraphs and between different list levels.
+
+# Example:
+
+# 1. Main point
+# - Sub-point
+# 1. Sub-sub-point
+# - Sub-sub-sub-point
+
+# All list sections **must follow this structure exactly**. Improper nesting or use of list styles will break the HTML conversion.
+# """
# Research guidelines
RESEARCH_GUIDELINES = {
@@ -53,11 +63,13 @@
- Ensure comprehensive research before responding
- Include citations and sources in your response
- Synthesize findings with the email content
+- Use appropriate search tools based on cost/quality needs (ddg_search > brave_search > google_search)
""",
"optional": """
RESEARCH GUIDELINES:
-- Deep research is NOT allowed for this handle
- Only use basic tools and provided information
- Focus on addressing the direct content of the email
+- If web search is needed, start with ddg_search for cost-effectiveness
+- Escalate to brave_search or google_search only if better results are needed
""",
}
diff --git a/mxtoai/prompts/output_prompts.py b/mxtoai/prompts/output_prompts.py
index 06d4080..5436b39 100644
--- a/mxtoai/prompts/output_prompts.py
+++ b/mxtoai/prompts/output_prompts.py
@@ -22,6 +22,7 @@
3. Detailed Analysis: In-depth exploration with subheadings
4. Supporting Evidence: Data, quotes, statistics
5. References: Numbered citations with links when available
+6. Have separate sections for each of the above mentioned.
"""
# Simplify handler output guidelines
@@ -37,7 +38,7 @@
# Ask handler output guidelines
ASK_OUTPUT_GUIDELINES = """
Output Format Guidelines:
-1. Begin with acknowledgment of the question
+1. Begin with acknowledgment of the question at the top of the response. Then begin any section.
2. Structure response with clear sections
3. Use examples to illustrate complex points
4. Include actionable recommendations when applicable
@@ -47,11 +48,12 @@
# Fact-check handler output guidelines
FACT_CHECK_OUTPUT_GUIDELINES = """
Output Format Guidelines:
-1. Present each claim in this format:
+1. Present a short summary of the original email to setup the context.
+2. Present each claim in this format:
- **Claim**: [Original statement]
- **Status**: [Verified ✓ / Not verified ❌ / Partially verified ⚠️]
- **Evidence**: [Supporting information]
- - **Sources**: [Citations with links]
+ - **Sources**: [Citations with links, make sure the links are valid]
2. Use consistent status symbols throughout
"""
@@ -61,7 +63,7 @@
1. Start with executive summary of key findings
2. Organize information by entity (person, organization, domain)
3. Use tables for comparative information
-4. Flag any security concerns prominently
+4. Flag any concerns prominently
"""
# Translation handler output guidelines
@@ -84,3 +86,20 @@
- **Notes**: Any assumptions or clarifications
2. Format times consistently with timezone
"""
+
+# PDF Export handler output guidelines
+PDF_EXPORT_OUTPUT_GUIDELINES = """
+Output Format Guidelines:
+1. Begin with a brief confirmation of PDF generation
+2. Include document details:
+ - **PDF Title**: Clear, descriptive document name
+ - **Content Summary**: What was included in the export
+ - **File Size/Pages**: Approximate document metrics
+ - **Attachment Notice**: Confirmation that PDF is attached
+3. Content processing notes:
+ - What content was included/excluded and why
+ - Any assumptions made during processing
+ - Quality of source material for export
+4. Professional tone acknowledging the export request
+5. Keep response concise - let the PDF be the main deliverable
+"""
diff --git a/mxtoai/prompts/template_prompts.py b/mxtoai/prompts/template_prompts.py
index 042f8bb..34bf452 100644
--- a/mxtoai/prompts/template_prompts.py
+++ b/mxtoai/prompts/template_prompts.py
@@ -4,19 +4,71 @@
# Summarize email handler template
SUMMARIZE_TEMPLATE = """
-Provide a concise, direct summary of the key points from the email and attachments.
+Systematically analyze and summarize content from all available sources with clear structure and action focus.
-Content Guidelines:
-1. Get straight to the key points
-2. No redundant introductions
-3. Include only relevant information
-4. Keep it concise but complete
-5. Use a natural, conversational tone
-
-Remember:
-- If the user has specific intent, then focus on what the user asked about
-- Skip unnecessary formality
-- Ensure proper markdown formatting
+# Summarization Process
+
+## STEP 1: Content Analysis
+- **Process ALL sources**: Email content, attachments, embedded links, external references(if asked)
+- **Assess complexity**: Determine appropriate detail level (concise/detailed/executive summary)
+- **Identify priorities**: Key messages, action items, deadlines, stakeholder impact
+
+## STEP 2: Structured Summary Format
+```
+## Executive Summary
+[2-3 sentences capturing core message and significance]
+
+## Main Points
+[Organized breakdown of key information from all sources]
+
+## Action Items
+- [Specific actions required with deadlines]
+- [Responsible parties if mentioned]
+
+## Additional Context
+[Important background, implications, supporting details]
+```
+
+## STEP 3: Quality Standards
+- **Process all content sources** before summarizing
+- **Highlight action items** clearly
+- **Note any inaccessible content** transparently
+- **Match detail level** to content complexity
+- **Maintain context** while being concise
+
+**Example Output:**
+```
+## Executive Summary
+Q3 sales report shows 12% revenue growth with West region leading performance, requiring strategy review meeting.
+
+## Key Information
+- **From**: Sales Director Sarah Chen
+- **Topic**: Q3 2024 Sales Performance Review
+- **Urgency**: Standard quarterly review
+- **Stakeholders**: Management team, regional leads
+
+## Main Points
+**Sales Performance (from Excel attachment):**
+- Total revenue: $4.2M (12% increase from Q2)
+- West region: 23% growth, exceeding targets
+- Product line A: 18% growth, strongest performer
+- Customer acquisition: 156 new accounts
+
+## Action Items
+- Review West region strategies for replication
+- Address East region performance decline
+- Quarterly review meeting: Next Friday
+
+## Additional Context
+Strong Q3 performance driven by West region success and Product A growth. East region needs attention.
+```
+
+**Critical Requirements:**
+- Process ALL available content sources (email, attachments, links)
+- Structure information for easy scanning
+- Clearly identify action items and deadlines
+- Note any content processing limitations
+- Adapt detail level to content complexity
"""
# Research handler template
@@ -31,7 +83,7 @@
- ### Detailed Analysis
- ### Supporting Evidence
- ### References
-2. Include proper citations [1], [2], etc.
+2. Include proper citations [1], [2], etc. if the deep_research tool provides them. For web_search results, extract the title and URL for each source and list them under the 'References' section using markdown link format (e.g., 1. [Page Title](URL)).
3. Format tables using markdown table syntax
4. Use proper paragraph spacing
@@ -45,152 +97,1013 @@
# Simplify handler template
SIMPLIFY_TEMPLATE = """
-Explain the content in simple, easy-to-understand terms without technical jargon, like you're explaining to a 5-year-old.
+Transform complex content into clear, accessible explanations using simple language and relatable examples.
-Content Guidelines:
-1. Use simple language
-2. Avoid technical terms
-3. Give everyday examples
-4. Keep explanations short
-5. Use bullet points for clarity
+# Simplification Process
+
+## STEP 1: Complexity Assessment
+- **Identify complexity sources**: Technical jargon, abstract concepts, complex processes, dense information
+- **Determine target level**: General public understanding (assume no specialized knowledge)
+- **Preserve core truth**: Maintain essential accuracy while removing complexity
+
+## STEP 2: Simplification Strategy
+**Language Techniques:**
+- Replace technical terms with everyday language(if replacement is not possible add dictionary at the end)
+- Break complex sentences into shorter, clearer ones
+- Use active voice and concrete examples
+- Add helpful analogies from familiar experiences
+
+**Structure Format:**
+```
+## The Simple Version
+[One clear sentence explaining the core concept]
+
+## What This Means
+[2-3 sentences expanding on the main idea]
+
+## Here's How It Works
+[Step-by-step breakdown in simple terms]
+
+## Think of It Like This
+[Relatable analogy or real-world example]
+
+## Why This Matters
+[Practical significance in everyday terms]
+
+## The Bottom Line
+[Key takeaway anyone can remember]
+```
+
+## STEP 3: Quality Check
+- Could a 12-year-old understand the main point?
+- Are technical terms explained or replaced?
+- Do analogies help rather than confuse?
+- Is the essential message preserved?
+
+**Requirements:**
+- Use simple, everyday language
+- Include helpful analogies and examples
+- Preserve accuracy while removing jargon
+- Make content accessible to general audiences
+- Maintain respectful tone (not condescending)
"""
# Ask handler template
ASK_TEMPLATE = """
-Provide a complete response addressing all aspects of the query.
+Execute custom tasks and workflows systematically with research, analysis, and professional presentation.
-Content Guidelines:
-1. Brief summary of understanding
-2. Detailed response
-3. Additional insights if relevant
-4. Next steps or recommendations
+# General Task Execution Process
+
+## STEP 1: Task Analysis & Planning
+- **Understand the request**: Break down what the user wants accomplished
+- **Identify components**: Research needs, data gathering, analysis, formatting requirements
+- **Determine approach**: What tools and steps are needed to complete this task
+- **Set quality standards**: How should the final output be structured and presented
+
+## STEP 2: Systematic Execution
+**Research & Data Gathering:**
+- Use web search for current information and trends
+- Visit relevant websites and sources
+- Process any attachments or provided materials(if needed)
+- Gather comprehensive data before analysis
+
+**Analysis & Curation:**
+- Filter and prioritize information based on relevance and quality
+- Identify key insights, patterns, or important details
+- Apply criteria for selection (trending, popularity, importance)
+- Add context and explanatory information
+
+**Content Creation:**
+- Structure information logically and professionally
+- Create engaging and informative content
+- Include proper citations and links
+- Format for easy reading and comprehension
+
+## STEP 3: Professional Presentation
+**Standard Output Structure:**
+```
+## [Task Title/Summary]
+[Brief overview of what was accomplished]
+
+## [Main Content Sections]
+[Organized, formatted content with clear headers]
+
+### [Subsections as needed]
+- [Bullet points, lists, or structured information]
+- [Include links, sources, and references]
+
+## Key Insights/Summary
+[Important takeaways or conclusions]
+
+## Sources & References
+[All sources used with proper attribution]
+```
+
+## STEP 4: Quality Standards
+- **Comprehensive research** using available tools
+- **Professional formatting** with clear structure
+- **Accurate information** with proper source attribution
+- **Engaging presentation** that's easy to read and understand
+- **Complete execution** of all requested components
+
+**Example Task: "Prepare a newsletter with top 10 trending HN posts"**
+
+```
+## Hacker News Top 10 Trending Posts Newsletter
+Daily digest of the most engaging discussions and innovations from the HN community.
+
+## Today's Top Trending Posts
+
+### 1. [Post Title](HN-link)
+**Summary**: Brief description of the post content and significance
+**Why It's Trending**: Key reasons for community engagement
+**Discussion Highlights**: Notable comments or insights from HN users
+**Relevance**: Why this matters to the tech community
+
+### 2. [Next Post Title](HN-link)
+[Same format structure]
+
+[Continue for all 10 posts]
+
+## Key Themes Today
+- [Pattern 1]: Multiple posts about [topic]
+- [Pattern 2]: Community interest in [area]
+- [Pattern 3]: Emerging trends in [field]
+
+## Community Insights
+Notable discussions, debates, or expert opinions from today's conversations.
+
+## Sources
+- Hacker News front page and trending algorithms
+- Individual post discussions and comment threads
+- Community engagement metrics and voting patterns
+```
+
+**Requirements:**
+- Execute any custom task or workflow systematically
+- Use all available tools for research and analysis
+- Present results professionally with proper structure
+- Include comprehensive sources and attribution
+- Adapt format and approach to specific task requirements
"""
# Fact-check handler template
FACT_TEMPLATE = """
-Validate and fact-check the content thoroughly. Use web search tool to find reliable sources alongside deep search tool.
-Do not use deep search directly, use web search and page visit tool, if you're not satisfied with results, then only try deep search.
+Systematically verify claims and statements with comprehensive source validation and transparent uncertainty handling.
-Response Requirements:
-1. Use proper markdown formatting:
- - **Claim**: for stating each claim
- - _Source_: for source citations
- - ✓ or ❌ for verification status
- - Bullet points for supporting evidence
- - [text](url) for reference links
-2. Structure each fact-check:
- - Original claim
- - Verification status
- - Supporting evidence
- - Source citations
-3. Use clear paragraph breaks between checks
+# Fact-Checking Methodology - SYSTEMATIC VERIFICATION PROCESS
-Content Guidelines:
-1. State each claim clearly
-2. Provide verification status
-3. Include supporting evidence
-4. Cite reliable sources
-5. Note any uncertainties
-6. Always give a disclaimer that sometimes links may be outdated or incorrect depending on age of the source
+## STEP 1: CLAIM EXTRACTION & CATEGORIZATION
+**Extract ALL verifiable claims from the content:**
+- **Factual Claims**: Statistics, dates, events, scientific facts
+- **Attribution Claims**: Quotes, statements attributed to people/organizations
+- **Causal Claims**: "X causes Y", "Due to X, Y happened"
+- **Comparative Claims**: Rankings, comparisons, "better/worse than"
+- **Current Status Claims**: Current prices, status, availability
+
+**Claim Prioritization:**
+- **High Priority**: Core claims central to the message
+- **Medium Priority**: Supporting details and context
+- **Low Priority**: Tangential or well-established facts
+
+## STEP 2: SYSTEMATIC VERIFICATION STRATEGY
+**Verification Hierarchy:**
+1. **Primary Sources**: Official websites, government data, organization statements
+2. **Academic Sources**: Peer-reviewed research, institutional studies, wikipedia
+3. **Established News Sources**: Reuters, AP, BBC, established newspapers
+4. **Industry Sources**: Trade publications, industry reports
+5. **Secondary Analysis**: Expert commentary, analysis pieces
+
+**Search Strategy:**
+1. **Direct Claim Search**: Search exact claim or paraphrased version
+2. **Source Verification**: Search for original source of claimed information
+3. **Counter-Evidence Search**: Actively search for contradicting information
+4. **Recent Updates**: Check for more recent information that might contradict
+5. **Context Search**: Understand broader context around the claim
+
+## STEP 3: SOURCE QUALITY ASSESSMENT
+**Evaluate each source on:**
+
MXtoAI Assistant
-_Feel free to reply to this email to continue our conversation._ +Feel free to reply to this email to continue our conversation.
""" def _init_template_env(self): @@ -92,6 +93,9 @@ def format_report( # Remove any existing signatures content = self._remove_existing_signatures(content) + # Apply markdown fixes for all formats + content = self._fix_ai_markdown(content) + # Process citations and references before converting format # DISABLED: _process_citations was causing issues with already formatted markdown. # The DeepResearchTool now handles citation/reference formatting directly. @@ -203,8 +207,11 @@ def _to_plain_text(self, markdown: str) -> str: Plain text version """ + # Handle tables first - convert markdown tables to plain text format + text = self._convert_tables_to_plain_text(markdown) + # Remove heading markers but preserve citations - text = re.sub(r"^#+\s+", "", markdown, flags=re.MULTILINE) + text = re.sub(r"^#+\s+", "", text, flags=re.MULTILINE) # Remove bold markers text = re.sub(r"\*\*(.*?)\*\*", r"\1", text) text = re.sub(r"__(.*?)__", r"\1", text) @@ -224,64 +231,199 @@ def _to_plain_text(self, markdown: str) -> str: text = re.sub(r"\n{3,}", "\n\n", text) return text.strip() + def _convert_tables_to_plain_text(self, markdown: str) -> str: + """ + Convert markdown tables to readable plain text format. + + Args: + markdown: Markdown content with tables + + Returns: + Markdown with tables converted to plain text + + """ + lines = markdown.split("\n") + result_lines = [] + i = 0 + + while i < len(lines): + line = lines[i].strip() + + # Check if this looks like a table header + if "|" in line and i + 1 < len(lines) and "|" in lines[i + 1] and "-" in lines[i + 1]: + # Found a table, process it + table_lines = [line] + i += 1 + + # Skip the separator line + i += 1 + + # Collect table rows + while i < len(lines) and "|" in lines[i].strip(): + table_lines.append(lines[i].strip()) + i += 1 + + # Convert table to plain text + plain_table = self._format_table_as_plain_text(table_lines) + result_lines.extend(plain_table) + result_lines.append("") # Add spacing after table + + continue + result_lines.append(lines[i]) + i += 1 + + return "\n".join(result_lines) + + def _format_table_as_plain_text(self, table_lines: list[str]) -> list[str]: + """ + Format a markdown table as readable plain text. + + Args: + table_lines: List of table lines (header + rows) + + Returns: + List of formatted plain text lines + + """ + if not table_lines: + return [] + + # Parse table data + rows = [] + for line in table_lines: + # Remove leading/trailing pipes and split + cells = [cell.strip() for cell in line.strip("|").split("|")] + rows.append(cells) + + if not rows: + return [] + + # Calculate column widths + max_cols = max(len(row) for row in rows) + col_widths = [] + + for col in range(max_cols): + max_width = 0 + for row in rows: + if col < len(row): + max_width = max(max_width, len(row[col])) + col_widths.append(max(max_width, 8)) # Minimum width of 8 + + # Format as plain text + result = [] + + for row_idx, row in enumerate(rows): + # Pad cells to column width with center alignment + formatted_cells = [] + for col in range(max_cols): + cell_content = row[col] if col < len(row) else "" + formatted_cells.append(cell_content.center(col_widths[col])) + + # Join with spacing + result.append(" ".join(formatted_cells).rstrip()) + + # Add separator after header + if row_idx == 0: + separator_parts = [] + for width in col_widths: + separator_parts.append("-" * width) + result.append(" ".join(separator_parts)) + + return result + def _to_html(self, markdown_content: str, theme: str = "default") -> str: """ - Convert markdown to HTML using templates and themes. + Convert markdown to HTML using markdown2 for robust AI-generated content handling. Args: - markdown_content: Markdown content + markdown_content: Markdown content (already processed by _fix_ai_markdown) theme: Theme name to use Returns: HTML version """ - try: - import markdown as md_converter - from markdown.extensions.attr_list import AttrListExtension - from markdown.extensions.fenced_code import FencedCodeExtension - from markdown.extensions.nl2br import Nl2BrExtension - from markdown.extensions.sane_lists import SaneListExtension - from markdown.extensions.tables import TableExtension - from markdown.extensions.toc import TocExtension - - # Configure extensions with specific settings - extensions = [ - TableExtension(), # Support for tables - FencedCodeExtension(), # Support for fenced code blocks - SaneListExtension(), # Better list handling - Nl2BrExtension(), # Convert newlines to line breaks - TocExtension(permalink=False), # Table of contents support without permalinks - AttrListExtension(), # Support for attributes - ] - - # Convert markdown to HTML with configured extensions - html_content = md_converter.markdown( - markdown_content, - extensions=extensions, - extension_configs={ - # Explicitly disable footnotes if it's a default or separate extension - # 'markdown.extensions.footnotes': {'PLACE_MARKER': '!!!!FOOTNOTES!!!!'} - }, - output_format="html5", # Use html5 for better compatibility - ) + # Convert markdown to HTML with markdown2 (robust for AI content) + html_content = markdown2.markdown( + markdown_content, + extras=[ + "fenced-code-blocks", # Support for ```code``` blocks + "tables", # Support for tables + "strike", # Support for ~~strikethrough~~ + "cuddled-lists", # Better list handling (key for AI content!) + "header-ids", # Add IDs to headers + "markdown-in-html", # Allow markdown inside HTML + "breaks", # Handle line breaks better + ], + ) + + if self.template_env: + try: + theme_settings = self.themes.get(theme, self.themes["default"]) + template = self.template_env.get_template("email_template.html") + + return template.render(content=html_content, theme=theme_settings) + except Exception as e: + logger.error(f"Template rendering failed: {e}. Falling back to basic rendering.") + + # fallback + logger.info("Template environment not available. Using basic HTML rendering.") + return self._basic_html_render(html_content) + + def _fix_ai_markdown(self, content: str) -> str: + """ + Fix AI-generated markdown issues that markdown2 doesn't handle. + This function performs several cleaning steps in a single pass over the lines. + + Args: + content: Raw markdown content + + Returns: + Fixed markdown content + + """ + lines = content.split("\n") + result_lines = [] - if self.template_env: - try: - theme_settings = self.themes.get(theme, self.themes["default"]) - template = self.template_env.get_template("email_template.html") + for i, line in enumerate(lines): + # --- FIX 1: Ensure headers are separated by a blank line --- + if line.strip().startswith("#") and i > 0 and result_lines and result_lines[-1].strip() != "": + # Insert blank line before header + result_lines.append("") - return template.render(content=html_content, theme=theme_settings) - except Exception as e: - logger.error(f"Template rendering failed: {e}. Falling back to basic rendering.") + # --- FIX 2: Manually parse and fix bolded links in list items --- + if line.strip().startswith(("*", "-")) and "**[" in line and "](" in line and ")**" in line: + # This is a very specific pattern, so we can be confident in this replacement + # Replace **[text](url)** with [**text**](url) + line = re.sub(r"\*\*\[(.*?)\]\((.*?)\)\*\*", r"[**\1**](\2)", line) + + # --- FIX 3: Convert letter-based lists to numbers --- + # e.g., a. Item -> 1. Item + match = re.match(r"^(\s*)([a-z])\.\s+(.*)$", line) + if match: + indent, letter, text = match.groups() + number = ord(letter) - ord("a") + 1 + line = f"{indent}{number}. {text}" + + # --- FIX 4: Fix mixed list formatting --- + # e.g., - 1. Item -> 1. Item + line = re.sub(r"^(\s*)[*-]\s+(\d+\.\s+.*)", r"\1\2", line) + + # --- FIX 5: Fix missing spaces after list markers --- + # Skip lines that start with bold markers like "**Summary:**" + if not (line.strip().startswith("**") and ("**:" in line or line.strip().endswith("**"))): + # Check for missing spaces after list markers + match = re.match(r"^(\s*)(\d+\.|\*|-|\+)([^\s].*)", line) + if match: + indent, marker, rest_of_line = match.groups() + # It's a real list item, just missing a space + line = f"{indent}{marker} {rest_of_line.lstrip()}" + + result_lines.append(line) + + return "\n".join(result_lines) - # fallback - logger.info("Template environment not available. Using basic HTML rendering.") - return self._basic_html_render(html_content, theme) - except ImportError: - logger.error("Markdown package not available - this should never happen as it's a required dependency") - raise # We should always have markdown package available def _basic_html_render(self, html_content: str) -> str: """ @@ -378,19 +520,35 @@ def _get_minimal_css(self) -> str: table { border-collapse: collapse; width: 100%; - margin: 1em 0; + margin: 1.5em 0; + font-size: 14px; + border: 2px solid #333; + background-color: #fff; } th, td { - border: 1px solid #ddd; - padding: 8px; - text-align: left; + border: 1px solid #333; + padding: 12px 16px; + text-align: center; + vertical-align: top; } th { - background-color: #f6f8fa; + background-color: #f0f0f0; + font-weight: bold; + color: #333; + border-bottom: 2px solid #333; } - tr:nth-child(even) { + tr:nth-child(even) td { background-color: #f9f9f9; } + td:first-child { + font-weight: 600; + background-color: #f6f8fa; + width: 30%; + } + table a { + color: #0366d6; + text-decoration: underline; + } blockquote { border-left: 4px solid #dfe2e5; margin: 0; diff --git a/mxtoai/scripts/templates/email_template.html b/mxtoai/scripts/templates/email_template.html index 9cd27dc..580ad7e 100644 --- a/mxtoai/scripts/templates/email_template.html +++ b/mxtoai/scripts/templates/email_template.html @@ -21,17 +21,17 @@ --container-width: {{ theme.spacing.container_width|default("800px") }}; --spacing-paragraph: {{ theme.spacing.paragraph|default("1em") }}; } - + * { box-sizing: border-box; margin: 0; padding: 0; } - + html { font-size: var(--font-size); } - + body { font-family: var(--font-family); line-height: var(--line-height); @@ -40,13 +40,13 @@ padding: 0; margin: 0; } - + .container { max-width: var(--container-width); margin: 0 auto; padding: 2rem 1rem; } - + /* Typography */ h1, h2, h3, h4, h5, h6 { color: var(--color-heading); @@ -55,78 +55,88 @@ font-weight: 600; line-height: 1.25; } - + h1 { font-size: 2em; } h2 { font-size: 1.5em; } h3 { font-size: 1.25em; } h4 { font-size: 1em; } h5 { font-size: 0.875em; } h6 { font-size: 0.85em; } - + p { margin-bottom: var(--spacing-paragraph); } - + a { color: var(--color-link); text-decoration: none; } - + a:hover { text-decoration: underline; } - + + /* List styles - Fixed for proper nesting */ ul, ol { - margin: 0 0 1rem 0.5rem; + margin: 0 0 1rem 0; padding-left: 2rem; list-style-position: outside; } - + /* Base list styles */ ul { list-style-type: disc; } ol { list-style-type: decimal; } - - ul ul, - ol ul { - list-style-type: circle; + + /* Nested unordered lists */ + ul ul { + list-style-type: circle; margin: 0.5rem 0; - padding-left: 3rem; } - - ul ul ul, - ol ul ul { - list-style-type: square; + + ul ul ul { + list-style-type: square; } - - ol ol, - ul ol { - list-style-type: lower-alpha; + + /* Nested ordered lists - this will make them show as a, b, c */ + ol ol { + list-style-type: lower-alpha; margin: 0.5rem 0; - padding-left: 4rem; } - - ol ol ol, - ul ol ol { - list-style-type: lower-roman; + + ol ol ol { + list-style-type: lower-roman; } - + + /* Mixed nesting */ + ul ol { + list-style-type: lower-alpha; + margin: 0.5rem 0; + } + + ol ul { + list-style-type: disc; + margin: 0.5rem 0; + } + + /* List items */ li { - margin: 0.5rem 0.5rem; + margin: 0.5rem 0; line-height: 1.5; display: list-item; - position: relative; } - + + /* Paragraph within list items */ li p { - margin: 0.5rem 0; - display: inline-block; + margin: 0; } - - li > ul, + + /* Nested lists within list items */ + li > ul, li > ol { + margin-top: 0.5rem; margin-bottom: 0.5rem; } - + code { font-family: SFMono-Regular, Consolas, "Liberation Mono", Menlo, monospace; background-color: var(--color-code-bg); @@ -134,7 +144,7 @@ border-radius: 3px; font-size: 0.9em; } - + pre { background-color: var(--color-code-bg); padding: 1rem; @@ -142,40 +152,67 @@ overflow-x: auto; margin: 1rem 0; } - + pre code { padding: 0; background-color: transparent; font-size: 0.9em; } - + table { border-collapse: collapse; width: 100%; margin: 1.5rem 0; - font-size: 0.95em; + font-size: 14px; + border: 2px solid #333; + background-color: #fff; } - + th, td { - border: 1px solid var(--color-table-border); - padding: 0.75rem; - text-align: left; + border: 1px solid #333; + padding: 12px 16px; + text-align: center; vertical-align: top; } - + th { - background-color: var(--color-table-header); + background-color: #f0f0f0; + font-weight: bold; + color: #333; + border-bottom: 2px solid #333; + } + + tr:nth-child(even) td { + background-color: #f9f9f9; + } + + /* First column styling for attribute tables */ + td:first-child { font-weight: 600; + background-color: #f6f8fa; + width: 30%; } - - tr:nth-child(even) { - background-color: rgba(0, 0, 0, 0.02); + + /* Links in tables */ + table a { + color: var(--color-link); + text-decoration: underline; + } + + /* Email client compatibility */ + table[border="0"] { + border: 2px solid #333; } - - tr:hover { - background-color: rgba(0, 0, 0, 0.03); + + table td[style*="border"] { + border: 1px solid #333; + } + + .table-wrapper { + width: 100%; + overflow-x: auto; } - + /* Blockquotes */ blockquote { border-left: 4px solid var(--color-table-border); @@ -183,14 +220,14 @@ color: var(--color-blockquote); margin: 1rem 0; } - + /* Horizontal Rule */ hr { border: none; border-top: 1px solid var(--color-table-border); margin: 1.5rem 0; } - + /* Images */ img { max-width: 100%; @@ -198,20 +235,20 @@ display: block; margin: 1.5rem auto; } - + /* Citations and References */ .citation { font-size: 0.8em; vertical-align: super; color: var(--color-blockquote); } - + .references { margin-top: 2rem; padding-top: 1rem; border-top: 1px solid var(--color-table-border); } - + .reference { margin: 0.5rem 0; padding: 0.5rem; @@ -219,7 +256,7 @@ border-left: 3px solid var(--color-table-border); font-size: 0.9em; } - + /* Table of Contents */ .toc { background-color: rgba(0, 0, 0, 0.02); @@ -227,16 +264,16 @@ margin: 1rem 0; border-radius: 5px; } - + .toc ul { list-style-type: none; padding-left: 1rem; } - + .toc li { margin: 0.3rem 0; } - + /* Signature */ .signature { color: var(--color-blockquote); @@ -245,13 +282,13 @@ padding-top: 1rem; margin-top: 2rem; } - + /* Additional utility classes */ .text-center { text-align: center; } .text-right { text-align: right; } .mt-0 { margin-top: 0; } .mb-0 { margin-bottom: 0; } - + /* Print styling */ @media print { body { @@ -259,69 +296,69 @@ color: #000; background: #fff; } - + .container { width: 100%; max-width: none; padding: 0; margin: 0; } - + a { text-decoration: underline; color: #000; } - + a[href]:after { content: " (" attr(href) ")"; font-size: 0.8em; } - + a[href^="#"]:after { content: ""; } - + pre, blockquote { border: 1px solid #999; page-break-inside: avoid; } - + thead { display: table-header-group; } - + tr, img { page-break-inside: avoid; } - + img { max-width: 100% !important; } - + p, h2, h3 { orphans: 3; widows: 3; } - + h2, h3 { page-break-after: avoid; } } - + /* Responsive adjustments */ @media (max-width: 600px) { .container { padding: 1rem 0.5rem; } - + table { font-size: 0.85em; } - + th, td { padding: 0.5rem; } - + pre { padding: 0.75rem; } @@ -333,4 +370,4 @@ {{ content|safe }}