Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion agents/crypto_trader_bot/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ Inspired by [How to build your own Add Liquidity Signal Telegram Bot for Solana
1. Clone the repository:
```bash
git clone https://github.com/swarmzero/examples.git
cd examples/agents/crypto-trader-bot
cd examples/agents/crypto_trader_bot
```

2. Install dependencies using Poetry:
Expand Down
6 changes: 6 additions & 0 deletions workflows/pdf_summarizer_tex_workflow/.env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
MODEL=gpt-4o-mini
ENVIRONMENT=dev
OPENAI_API_KEY=
OPENROUTER_MODEL=gryphe/mythomax-l2-13b
SWARMZERO_LOG_LEVEL=INFO
LANGTRACE_API_KEY=
3 changes: 3 additions & 0 deletions workflows/pdf_summarizer_tex_workflow/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
.venv
swarmzero-data
.env
39 changes: 39 additions & 0 deletions workflows/pdf_summarizer_tex_workflow/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# PDF Summarizer to Tex Workflow

A sequential workflow built using SwarmZero framework that enables concise summarization of large pdf's to be outputted in a tex file

## Description

This workflow utilizes PyMuPDF for extracting text out of a pdf and is built on top fo the SwarmZero framework, providing enhanced summarization cpabilities wiht AI-powered processing.

## Prerequisites

- Python 3.11 or higher
- Poetry package manager
- OpenAI API Key
- Langtrace API Key

## Installation

1. Clone the repository:
```bash
git clone https://github.com/swarmzero/examples.git
cd examples/workflows/pdf_summarizer_tex_workflow
```

2. Install dependencies using Poetry:
```bash
poetry install --no-root
```

3. Set up environment variables:
Create a `.env` file in the root directory and add your API keys based on the .env.example file
## Usage

Run the workflow on a pdf like so:
``` bash
python main.py path/to/pdf/text.pdf
```

## Learn more
Visit [SwarmZero](https://swarmzero.ai) to learn more about the SwarmZero framework.
144 changes: 144 additions & 0 deletions workflows/pdf_summarizer_tex_workflow/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
import asyncio
import os
import sys
import uuid
from typing import List

import fitz # PyMuPDF

from swarmzero.agent import Agent
from swarmzero.sdk_context import SDKContext
from swarmzero.workflow import Workflow, WorkflowStep, StepMode


def extract_text_from_pdf(pdf_path: str) -> str:
doc = fitz.open(pdf_path)
text = "\n".join(page.get_text() for page in doc)
doc.close()
return text


def chunk_text(text: str, max_chars: int = 2000) -> List[str]:
paragraphs = text.split("\n")
chunks, current = [], ""
for para in paragraphs:
if len(current) + len(para) < max_chars:
current += para + "\n"
else:
chunks.append(current.strip())
current = para + "\n"
if current:
chunks.append(current.strip())
return chunks


def summarize_bullets(prompt: str, **kwargs):
lines = [line.strip() for line in prompt.split(".") if line.strip()]
bullets = "\n".join(f"- {line}" for line in lines)
return f"[Bullet Point Summary]:\n{bullets}"

def format_latex(prompt: str, **kwargs) -> str:
bullet_lines = prompt.strip().split("\n")
latex_items = "\n".join(f"\\item {line.lstrip('- ').strip()}" for line in bullet_lines if line.startswith("-"))

latex_doc = (
"\\documentclass{article}\n"
"\\usepackage[utf8]{inputenc}\n"
"\\usepackage{enumitem}\n"
"\\begin{document}\n"
"\\section*{Summary Notes}\n"
"\\begin{itemize}[leftmargin=*, label=--]\n"
f"{latex_items}\n"
"\\end{itemize}\n"
"\\end{document}"
)
return latex_doc



CONFIG_PATH = os.path.join(os.path.dirname(__file__), "swarmzero_config.toml")

sdk_context = SDKContext(CONFIG_PATH)

summarizer_agent = Agent(
name="BulletSummarizerAgent",
functions=[summarize_bullets],
instruction="Summarize input in bullet points.",
agent_id=str(uuid.uuid4()),
config_path=CONFIG_PATH,
sdk_context=sdk_context,
chat_only_mode=True
)


async def run_agent(prompt, **kwargs):
return await summarizer_agent.chat(prompt)

latex_agent = Agent(
name="LatexFormatterAgent",
functions=[format_latex],
instruction="Format input bullet points into a LaTeX document.",
agent_id=str(uuid.uuid4()),
config_path=CONFIG_PATH,
sdk_context=sdk_context,
chat_only_mode=True
)

async def run_latex_agent(prompt, **kwargs):
return await latex_agent.chat(prompt)

workflow = Workflow(
name="pdf_latex_summary",
instruction="Summarize PDF and convert to LaTeX notes.",
description="A workflow that summarizes and formats PDF text as LaTeX.",
steps=[
WorkflowStep(name="SummarizeBullets", runner=run_agent, mode=StepMode.SEQUENTIAL),
WorkflowStep(name="FormatLatex", runner=run_latex_agent, mode=StepMode.SEQUENTIAL),
],
sdk_context=sdk_context,
)

async def main():
pdf_path = sys.argv[1] if len(sys.argv) > 1 else "sample.pdf"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

include a sample pdf under a new 'data' folder

text = extract_text_from_pdf(pdf_path)
chunks = chunk_text(text)

# Step 1: Summarize each chunk into bullets
bullet_summaries = []
for chunk in chunks:
summary = await run_agent(chunk)
bullet_summaries.append(summary)

# Step 2: Merge all bullet summaries into one bullet string
merged_bullets = "\n".join(bullet_summaries)

# Step 3: Extract just the bullet lines
bullet_lines = [line.strip() for line in merged_bullets.split("\n") if line.strip().startswith("-")]
latex_items = "\n".join(f"\\item {line.lstrip('- ').strip()}" for line in bullet_lines)

# Step 4: Hardcoded LaTeX wrapping
final_latex = (
"\\documentclass{article}\n"
"\\usepackage[utf8]{inputenc}\n"
"\\usepackage{enumitem}\n"
"\\begin{document}\n"
"\\section*{Summary Notes}\n"
"\\begin{itemize}[leftmargin=*, label=--]\n"
f"{latex_items}\n"
"\\end{itemize}\n"
"\\end{document}"
)

# Step 5: Write the final LaTeX doc to file
base_name = os.path.splitext(os.path.basename(pdf_path))[0]
output_path = f"{base_name}_summary.tex"
with open(output_path, "w", encoding="utf-8") as f:
f.write(final_latex)

print(f"Summary written to: {output_path}")




if __name__ == "__main__":
asyncio.run(main())
Loading