diff --git a/examples/classify/sec_filing_classify_extract.ipynb b/examples/classify/sec_filing_classify_extract.ipynb
new file mode 100644
index 00000000..da98dcef
--- /dev/null
+++ b/examples/classify/sec_filing_classify_extract.ipynb
@@ -0,0 +1,929 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "3b349365",
+   "metadata": {},
+   "source": [
+    "# Classifying and Extracting from SEC Filings\n",
+    "\n",
+    "<a href=\"https://colab.research.google.com/github/run-llama/llama_cloud_services/blob/main/examples/classify/sec_filing_classify_extract.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>\n",
+    "\n",
+    "This notebook demonstrates how to classify and extract information from SEC filings using LlamaParse. We'll walk through the process of classifying a document as either a 10-K or 10-Q filing and then extracting the relevant information.\n",
+    "\n",
+    "**Note**: The classification module is currently in *beta*, so we are still ironing out some interface/implementation details. Please let us know your feedback!\n",
+    "\n",
+    "Status:\n",
+    "| Last Executed | Version | State      |\n",
+    "|---------------|---------|------------|\n",
+    "| Sep-09-2025   | 0.6.65  | Maintained |"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "22fb430e",
+   "metadata": {},
+   "source": [
+    "## Overview\n",
+    "\n",
+    "This notebook demonstrates a classify+extract workflow on SEC filings using LlamaCloud and LlamaIndex Workflows.\n",
+    "\n",
+    "- Classify each document as one of: 10-K, 10-Q, 8-K, Proxy (DEF 14A)\n",
+    "- Extract a type-specific schema depending on the classification\n",
+    "- Orchestrate via an event-driven LlamaIndex Workflow\n",
+    "\n",
+    "We also include public example documents, so anyone can run this end-to-end.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "12356c11",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Install and imports\n",
+    "import os\n",
+    "from typing import List, Optional\n",
+    "from datetime import date\n",
+    "from decimal import Decimal\n",
+    "from pydantic import BaseModel, Field\n",
+    "\n",
+    "from dotenv import load_dotenv\n",
+    "\n",
+    "load_dotenv()\n",
+    "\n",
+    "# LlamaIndex workflow imports\n",
+    "from llama_index.core.workflow import (\n",
+    "    Event,\n",
+    "    StartEvent,\n",
+    "    StopEvent,\n",
+    "    Context,\n",
+    "    Workflow,\n",
+    "    step,\n",
+    ")\n",
+    "from llama_index.core.prompts import ChatPromptTemplate\n",
+    "from llama_index.llms.openai import OpenAI\n",
+    "\n",
+    "# LlamaCloud classify/extract\n",
+    "from llama_cloud.client import AsyncLlamaCloud\n",
+    "from llama_cloud.types import ClassifierRule, ClassifyParsingConfiguration\n",
+    "from llama_cloud_services.beta.classifier.client import ClassifyClient\n",
+    "from llama_cloud_services import LlamaExtract, ExtractionAgent\n",
+    "from llama_cloud import ExtractConfig\n",
+    "from llama_cloud.core.api_error import ApiError"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "635e00b7",
+   "metadata": {},
+   "source": [
+    "## Sample documents\n",
+    "\n",
+    "We will download four public SEC filings (10-K, 10-Q, 8-K, Proxy) into `examples/classify/data/` and run the workflow over them.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c839b119",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'10-K': 'data/msft_10k.pdf',\n",
+       " '10-Q': 'data/msft_10q.pdf',\n",
+       " '8-K': 'data/msft_8k.pdf',\n",
+       " 'Proxy': 'data/msft_proxy.pdf'}"
+      ]
+     },
+     "execution_count": null,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Download Microsoft PDFs for all four types\n",
+    "import pathlib\n",
+    "import requests\n",
+    "\n",
+    "DATA_DIR = pathlib.Path(\"data\")\n",
+    "DATA_DIR.mkdir(parents=True, exist_ok=True)\n",
+    "\n",
+    "MSFT_DOCS = {\n",
+    "    \"10-K\": \"https://microsoft.gcs-web.com/static-files/1c864583-06f7-40cc-a94d-d11400c83cc8\",\n",
+    "    \"10-Q\": \"https://microsoft.gcs-web.com/static-files/f96f7d38-36ce-4a26-9e29-61701cdca7a7\",\n",
+    "    \"8-K\": \"https://microsoft.gcs-web.com/static-files/dc50633a-2880-4303-bebb-bdca89149f65\",\n",
+    "    \"Proxy\": \"https://microsoft.gcs-web.com/static-files/d5ec87b3-e29d-4d33-9d84-5ce1f194dcf1\",\n",
+    "}\n",
+    "\n",
+    "local_files = {}\n",
+    "for k, url in MSFT_DOCS.items():\n",
+    "    out_path = DATA_DIR / f\"msft_{k.replace('-', '').lower()}.pdf\"\n",
+    "    if not out_path.exists():\n",
+    "        # special case for proxy, run wget\n",
+    "        r = requests.get(url, timeout=60)\n",
+    "        r.raise_for_status()\n",
+    "        with open(out_path, \"wb\") as f:\n",
+    "            f.write(r.content)\n",
+    "    local_files[k] = str(out_path)\n",
+    "\n",
+    "local_files"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c882ec38",
+   "metadata": {},
+   "source": [
+    "## Define type-specific extraction schemas\n",
+    "\n",
+    "We define concise Pydantic schemas for 10-K, 10-Q, 8-K, and Proxy (DEF 14A).\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3450e8d8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class Form10K(BaseModel):\n",
+    "    company_name: str\n",
+    "    fiscal_year_end: date\n",
+    "    annual_revenue: Optional[Decimal] = None\n",
+    "    net_income: Optional[Decimal] = None\n",
+    "    total_assets: Optional[Decimal] = None\n",
+    "    employee_count: Optional[int] = None\n",
+    "    business_description: str\n",
+    "    primary_risk_factors: List[str]\n",
+    "    business_segments: List[str]\n",
+    "    geographic_markets: List[str]\n",
+    "\n",
+    "\n",
+    "class Form10Q(BaseModel):\n",
+    "    company_name: str\n",
+    "    quarter_end: date\n",
+    "    quarterly_revenue: Optional[Decimal] = None\n",
+    "    quarterly_net_income: Optional[Decimal] = None\n",
+    "    revenue_change_pct: Optional[float] = None\n",
+    "    material_changes: List[str]\n",
+    "    subsequent_events: List[str]\n",
+    "\n",
+    "\n",
+    "class Form8K(BaseModel):\n",
+    "    company_name: str\n",
+    "    filing_date: date\n",
+    "    event_date: date\n",
+    "    event_type: str\n",
+    "    material_event_description: str\n",
+    "    financial_impact: Optional[Decimal] = None\n",
+    "    involved_parties: List[str]\n",
+    "\n",
+    "\n",
+    "class ProxyStatement(BaseModel):\n",
+    "    company_name: str\n",
+    "    meeting_date: date\n",
+    "    ceo_name: str\n",
+    "    ceo_total_compensation: Optional[Decimal] = None\n",
+    "    board_members: List[str]\n",
+    "    executive_officers: List[str]\n",
+    "    shareholder_proposals: List[str]\n",
+    "    voting_matters: List[str]\n",
+    "    audit_firm: Optional[str] = None"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6a68087e",
+   "metadata": {},
+   "source": [
+    "## Classification rules\n",
+    "\n",
+    "We define four `ClassifierRule` entries describing each SEC form in natural language. The classifier returns the `type` string for the best-matching rule.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "73a78a11",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# NOTE: the types need to be in lowercase\n",
+    "SEC_CLASSIFICATION_RULES: list[ClassifierRule] = [\n",
+    "    ClassifierRule(\n",
+    "        type=\"10-k\",\n",
+    "        description=(\n",
+    "            \"Annual report on Form 10-K, includes business overview, risk factors, management's\"\n",
+    "            \" discussion and analysis, audited financial statements for the fiscal year.\"\n",
+    "        ),\n",
+    "    ),\n",
+    "    ClassifierRule(\n",
+    "        type=\"10-q\",\n",
+    "        description=(\n",
+    "            \"Quarterly report on Form 10-Q, includes unaudited quarterly financial statements,\"\n",
+    "            \" MD&A for the quarter, and updates on risk factors.\"\n",
+    "        ),\n",
+    "    ),\n",
+    "    ClassifierRule(\n",
+    "        type=\"8-k\",\n",
+    "        description=(\n",
+    "            \"Current report on Form 8-K, discloses material events such as acquisitions,\"\n",
+    "            \" executive changes, earnings releases, or other significant occurrences.\"\n",
+    "        ),\n",
+    "    ),\n",
+    "    ClassifierRule(\n",
+    "        type=\"proxy\",\n",
+    "        description=(\n",
+    "            \"DEF 14A proxy statement for shareholder meetings including proposals and voting,\"\n",
+    "            \" board of directors, executive compensation (CD&A), and auditor information.\"\n",
+    "        ),\n",
+    "    ),\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ae680962",
+   "metadata": {},
+   "source": [
+    "## Initialize clients\n",
+    "\n",
+    "We create clients for classification and extraction. Set `LLAMA_CLOUD_API_KEY` in your environment. Optionally set `LLAMA_CLOUD_BASE_URL`, `LLAMA_CLOUD_PROJECT_ID`, `LLAMA_CLOUD_ORGANIZATION_ID`.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "446dbb35",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "api_key = os.getenv(\"LLAMA_CLOUD_API_KEY\")\n",
+    "base_url = os.getenv(\"LLAMA_CLOUD_BASE_URL\")\n",
+    "project_id = os.getenv(\"LLAMA_CLOUD_PROJECT_ID\")\n",
+    "organization_id = os.getenv(\"LLAMA_CLOUD_ORGANIZATION_ID\")\n",
+    "\n",
+    "if not api_key:\n",
+    "    raise ValueError(\"LLAMA_CLOUD_API_KEY not set. Please set it in your environment.\")\n",
+    "\n",
+    "async_client = AsyncLlamaCloud(token=api_key, base_url=base_url)\n",
+    "classify_client = ClassifyClient(\n",
+    "    async_client, project_id=project_id, organization_id=organization_id\n",
+    ")\n",
+    "\n",
+    "extract_config = ExtractConfig(extraction_mode=\"BALANCED\")\n",
+    "llama_extract = LlamaExtract(project_id=project_id, organization_id=organization_id)\n",
+    "\n",
+    "# Model for LLM summarization in prompts if needed\n",
+    "llm = OpenAI(model=\"gpt-4o\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b21d50f0",
+   "metadata": {},
+   "source": [
+    "## Using Classify Module\n",
+    "\n",
+    "In this section we show you how to use the `ClassifyClient` module in a standalone manner (before using it in an e2e workflow).\n",
+    "\n",
+    "We run the classification module over the Microsoft 10-K file to verify that the output is in the correct class.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f356085d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# set parsing configuration\n",
+    "parsing_config = ClassifyParsingConfiguration(max_pages=5)\n",
+    "\n",
+    "# classify file\n",
+    "results = await classify_client.aclassify_file_path(\n",
+    "    rules=SEC_CLASSIFICATION_RULES,\n",
+    "    file_input_path=\"data/msft_10k.pdf\",\n",
+    "    parsing_configuration=parsing_config,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2d02406d",
+   "metadata": {},
+   "source": [
+    "The result will not only contain the classification type, but also the reasoning."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "34249af0",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "10-k\n",
+      "The document is titled 'FORM 10-K' and is labeled as an 'ANNUAL REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934' for the fiscal year ended June 30, 2024. It contains all the hallmark sections of a 10-K, including Business Overview, Risk Factors, Management’s Discussion and Analysis (MD&A), and audited financial statements. The index and content structure match the requirements for a 10-K filing, and there is explicit reference to the form throughout the document. There is no ambiguity or evidence suggesting it is any other type of SEC filing. Therefore, this is a perfect match for the 10-k category.\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(results.items[0].result.type)\n",
+    "print(results.items[0].result.reasoning)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8408bb00",
+   "metadata": {},
+   "source": [
+    "## Workflow: Classify then Extract\n",
+    "\n",
+    "We build a `Workflow` with steps:\n",
+    "- `classify_file`: upload and classify the document\n",
+    "- `extract_by_type`: create/select an agent for the type and extract the corresponding schema\n",
+    "- `format_output`: return unified JSON with `type` and `data`\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "75ddf93a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class ClassifiedEvent(Event):\n",
+    "    file_path: str\n",
+    "    doc_type: str\n",
+    "\n",
+    "\n",
+    "class ExtractedEvent(Event):\n",
+    "    file_path: str\n",
+    "    doc_type: str\n",
+    "    data: dict\n",
+    "\n",
+    "\n",
+    "def _schema_for_type(doc_type: str):\n",
+    "    if doc_type == \"10-k\":\n",
+    "        return Form10K\n",
+    "    if doc_type == \"10-q\":\n",
+    "        return Form10Q\n",
+    "    if doc_type == \"8-k\":\n",
+    "        return Form8K\n",
+    "    if doc_type == \"proxy\":\n",
+    "        return ProxyStatement\n",
+    "    raise ValueError(f\"Unsupported doc_type: {doc_type}\")\n",
+    "\n",
+    "\n",
+    "def _agent_name_for_type(doc_type: str) -> str:\n",
+    "    return f\"sec-{doc_type.lower()}-extractor\"\n",
+    "\n",
+    "\n",
+    "class SECClassifyExtractWorkflow(Workflow):\n",
+    "    def __init__(self, **kwargs):\n",
+    "        super().__init__(**kwargs)\n",
+    "        self.agent_registry: dict[str, ExtractionAgent] = {}\n",
+    "\n",
+    "    @step\n",
+    "    async def classify_file(self, ctx: Context, ev: StartEvent) -> ClassifiedEvent:\n",
+    "        file_path = ev.file_path\n",
+    "        parsing_config = ClassifyParsingConfiguration(max_pages=5)\n",
+    "        results = await classify_client.aclassify_file_path(\n",
+    "            rules=SEC_CLASSIFICATION_RULES,\n",
+    "            file_input_path=file_path,\n",
+    "            parsing_configuration=parsing_config,\n",
+    "        )\n",
+    "        item = results.items[0]\n",
+    "        doc_type = item.result.type\n",
+    "        return ClassifiedEvent(file_path=file_path, doc_type=doc_type)\n",
+    "\n",
+    "    @step\n",
+    "    async def extract_by_type(\n",
+    "        self, ctx: Context, ev: ClassifiedEvent\n",
+    "    ) -> ExtractedEvent:\n",
+    "        schema = _schema_for_type(ev.doc_type)\n",
+    "        agent_name = _agent_name_for_type(ev.doc_type)\n",
+    "\n",
+    "        # Lazily create agent if not present\n",
+    "        if ev.doc_type not in self.agent_registry:\n",
+    "            try:\n",
+    "                existing = llama_extract.get_agent(name=agent_name)\n",
+    "                if existing:\n",
+    "                    llama_extract.delete_agent(existing.id)\n",
+    "            except ApiError as e:\n",
+    "                if e.status_code != 404:\n",
+    "                    raise\n",
+    "            agent = llama_extract.create_agent(\n",
+    "                name=agent_name, data_schema=schema, config=extract_config\n",
+    "            )\n",
+    "            self.agent_registry[ev.doc_type] = agent\n",
+    "\n",
+    "        extraction = await self.agent_registry[ev.doc_type].aextract(ev.file_path)\n",
+    "        data = (\n",
+    "            extraction.data\n",
+    "            if isinstance(extraction.data, dict)\n",
+    "            else extraction.model_dump()\n",
+    "        )\n",
+    "        return ExtractedEvent(file_path=ev.file_path, doc_type=ev.doc_type, data=data)\n",
+    "\n",
+    "    @step\n",
+    "    async def format_output(self, ctx: Context, ev: ExtractedEvent) -> StopEvent:\n",
+    "        return StopEvent(\n",
+    "            result={\"type\": ev.doc_type, \"data\": ev.data, \"file\": ev.file_path}\n",
+    "        )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a98463ec",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Running step classify_file\n",
+      "Running step classify_file\n",
+      "Running step classify_file\n",
+      "Running step classify_file\n",
+      "Step classify_file produced event ClassifiedEvent\n",
+      "Running step extract_by_type\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Uploading files: 100%|████████████████████████████████████████████| 1/1 [00:01<00:00,  1.23s/it]\n",
+      "Creating extraction jobs: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.21it/s]\n",
+      "Extracting files: 100%|███████████████████████████████████████████| 1/1 [00:12<00:00, 12.11s/it]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Step extract_by_type produced event ExtractedEvent\n",
+      "Running step format_output\n",
+      "Step format_output produced event StopEvent\n",
+      "Step classify_file produced event ClassifiedEvent\n",
+      "Running step extract_by_type\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Uploading files: 100%|████████████████████████████████████████████| 1/1 [00:00<00:00,  2.13it/s]\n",
+      "Creating extraction jobs: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.13it/s]\n",
+      "Extracting files: 100%|███████████████████████████████████████████| 1/1 [00:17<00:00, 17.91s/it]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Step extract_by_type produced event ExtractedEvent\n",
+      "Running step format_output\n",
+      "Step format_output produced event StopEvent\n",
+      "Step classify_file produced event ClassifiedEvent\n",
+      "Running step extract_by_type\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Uploading files:   0%|                                                    | 0/1 [00:00<?, ?it/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Step classify_file produced event ClassifiedEvent\n",
+      "Running step extract_by_type\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Uploading files:   0%|                                                    | 0/1 [00:00<?, ?it/s]\u001b[A\n",
+      "Uploading files: 100%|████████████████████████████████████████████| 1/1 [00:01<00:00,  1.26s/it]\u001b[A\n",
+      "\n",
+      "Uploading files: 100%|████████████████████████████████████████████| 1/1 [00:03<00:00,  3.63s/it]\u001b[A\n",
+      "Creating extraction jobs:   0%|                                           | 0/1 [00:00<?, ?it/s]\n",
+      "Creating extraction jobs: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.40it/s]\u001b[A\n",
+      "\n",
+      "Creating extraction jobs: 100%|███████████████████████████████████| 1/1 [00:00<00:00,  1.56it/s]\u001b[A\n",
+      "Extracting files: 100%|██████████████████████████████████████████| 1/1 [03:02<00:00, 182.34s/it]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Step extract_by_type produced event ExtractedEvent\n",
+      "Running step format_output\n",
+      "Step format_output produced event StopEvent\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Extracting files: 100%|██████████████████████████████████████████| 1/1 [03:18<00:00, 198.50s/it]\u001b[A"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Step extract_by_type produced event ExtractedEvent\n",
+      "Running step format_output\n",
+      "Step format_output produced event StopEvent\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'10-K': {'type': '10-k', 'file': 'data/msft_10k.pdf'},\n",
+       " '10-Q': {'type': '10-q', 'file': 'data/msft_10q.pdf'},\n",
+       " '8-K': {'type': '8-k', 'file': 'data/msft_8k.pdf'},\n",
+       " 'Proxy': {'type': 'proxy', 'file': 'data/msft_proxy.pdf'}}"
+      ]
+     },
+     "execution_count": null,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import asyncio\n",
+    "import nest_asyncio\n",
+    "\n",
+    "nest_asyncio.apply()\n",
+    "\n",
+    "# Optional: limit concurrency (helps with rate limits)\n",
+    "sem = asyncio.Semaphore(4)\n",
+    "\n",
+    "\n",
+    "async def run_one(doc_type: str, path: str):\n",
+    "    async with sem:  # remove this line and the 'async with' to run fully unbounded\n",
+    "        wf = SECClassifyExtractWorkflow(verbose=True, timeout=None)\n",
+    "        result = await wf.run(file_path=path)\n",
+    "        return doc_type, result\n",
+    "\n",
+    "\n",
+    "pairs = await asyncio.gather(*(run_one(t, p) for t, p in local_files.items()))\n",
+    "results_by_type = {t: res for t, res in pairs}\n",
+    "\n",
+    "# Quick view\n",
+    "{t: {\"type\": v[\"type\"], \"file\": v[\"file\"]} for t, v in results_by_type.items()}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4bd8aee0",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "==== 10-k ====\n",
+      "{'annual_revenue': 245157565500.0,\n",
+      " 'company_name': 'Microsoft Corporation',\n",
+      " 'fiscal_year_end': 'June 30, 2024',\n",
+      " 'net_income': 88136000000.0,\n",
+      " 'primary_risk_factors': ['We face intense competition across all markets for '\n",
+      "                          'our products and services, which may adversely '\n",
+      "                          'affect our results of operations. Competition in '\n",
+      "                          'the technology sector from diversified global '\n",
+      "                          'companies and small, specialized firms. Barriers to '\n",
+      "                          'entry in many of our businesses are low and many '\n",
+      "                          'areas in which we compete evolve rapidly with '\n",
+      "                          'changing and disruptive technologies, shifting user '\n",
+      "                          'needs, and frequent introductions of new products '\n",
+      "                          'and services. Competition from firms that provide '\n",
+      "                          'competing platforms and platform-based ecosystems. '\n",
+      "                          'Competition from vertically-integrated models where '\n",
+      "                          'firms control software, hardware, and related '\n",
+      "                          'services, increasing their revenue and potential '\n",
+      "                          'security/performance benefits. Competing platforms '\n",
+      "                          'offer content and application marketplaces with '\n",
+      "                          'scale and significant installed bases, challenging '\n",
+      "                          'our ability to attract developers. Cloud-based '\n",
+      "                          'services competition for consumers and business '\n",
+      "                          'customers; pricing and delivery models are '\n",
+      "                          'evolving. Highly competitive and rapidly evolving '\n",
+      "                          'AI technology and services market. Competition from '\n",
+      "                          'companies distributing open source software at '\n",
+      "                          'little or no cost to end users.',\n",
+      "                          'Cyberattacks and security vulnerabilities could '\n",
+      "                          'lead to reduced revenue, increased costs, liability '\n",
+      "                          'claims, or harm to reputation or competitive '\n",
+      "                          'position. Nation-state and state-sponsored cyber '\n",
+      "                          'attacks. Security threats to IT, including evolving '\n",
+      "                          'methods by hackers and organizations to gain '\n",
+      "                          'unauthorized access. Security vulnerabilities in '\n",
+      "                          'products and services, including data corruption, '\n",
+      "                          'reduced performance, or misuse of personal data '\n",
+      "                          'leading to reputational harm, legal exposure, or '\n",
+      "                          'liability. Abuse of platforms such as '\n",
+      "                          'impersonation, misinformation, objectionable or '\n",
+      "                          'illegal content, and compliance with content '\n",
+      "                          'moderation regulations. Defective, insecure, or '\n",
+      "                          'ineffective products, including those involving AI '\n",
+      "                          'or Internet of Things devices, leading to legal '\n",
+      "                          'claims, reputational damage, or regulatory action.',\n",
+      "                          'AI-related risks including flawed algorithms, '\n",
+      "                          'biased datasets, harmful outputs, copyright or '\n",
+      "                          'legal claims, regulatory changes, and '\n",
+      "                          'ethical/societal impacts. Operational risks '\n",
+      "                          'including outages, data losses, supply chain '\n",
+      "                          'disruptions, datacenter and server component '\n",
+      "                          'shortages, hardware and software quality issues. '\n",
+      "                          'Government enforcement under competition laws, '\n",
+      "                          'antitrust actions, new market regulations, and '\n",
+      "                          'resulting fines, restrictions, or inability to '\n",
+      "                          'monetize or operate products. Anti-corruption, '\n",
+      "                          'trade, and export control laws and related '\n",
+      "                          'compliance risks, potential for fines, penalties, '\n",
+      "                          'operational bans, or reputational damage. Evolving '\n",
+      "                          'laws and regulations relating to the handling of '\n",
+      "                          'personal data (e.g. GDPR, Digital Markets Act, data '\n",
+      "                          'localization), including cross-border transfer '\n",
+      "                          'restrictions, legal challenges, and costs of '\n",
+      "                          'compliance. Expanding legal, regulatory, and '\n",
+      "                          'reporting requirements in areas such as user data '\n",
+      "                          'privacy, digital accessibility, advertising, AI, '\n",
+      "                          'and cybersecurity.',\n",
+      "                          'Material legal claims, lawsuits, and uncertain '\n",
+      "                          'litigation outcomes related to product releases, '\n",
+      "                          'AI, government contracts, employment, and IP. '\n",
+      "                          'Adverse tax determinations, audits (including IRS '\n",
+      "                          'Notices of Proposed Adjustment), changing tax laws '\n",
+      "                          'or international agreements affecting effective tax '\n",
+      "                          'rate and liabilities. Sustainability and '\n",
+      "                          'ESG-related legal requirements and fulfillment of '\n",
+      "                          'public sustainability commitments, including risks '\n",
+      "                          'of legal action, regulatory penalties, or '\n",
+      "                          'reputational damage. Intellectual property risks '\n",
+      "                          'including inability to protect IP, source code '\n",
+      "                          'leaks, infringement claims (including regarding AI '\n",
+      "                          'training), and exposure to royalty, damages, or '\n",
+      "                          'injunctions. Reputation or brand harm from customer '\n",
+      "                          'or stakeholder backlash, product/service issues, '\n",
+      "                          'privacy/data breaches, or failures in responsible '\n",
+      "                          'AI. Adverse economic conditions, inflation, '\n",
+      "                          'recession, market instability, customer insolvency, '\n",
+      "                          'and investment impairments.',\n",
+      "                          'Catastrophic events, geopolitical risks, pandemics, '\n",
+      "                          'climate change, and natural disasters disrupting '\n",
+      "                          'operations or affecting financial condition. Risks '\n",
+      "                          'related to attracting and retaining talented '\n",
+      "                          'employees, workforce diversity, unionization, and '\n",
+      "                          'compliance with changing employment laws. '\n",
+      "                          'Cybersecurity threats. Challenges in evolving '\n",
+      "                          'technology and business models. Rapid changes in '\n",
+      "                          'customer device and form factor preferences. Global '\n",
+      "                          'macroeconomic and geopolitical factors. '\n",
+      "                          'Availability of land, energy, networking supplies, '\n",
+      "                          'and servers for datacenter expansion. Dependence on '\n",
+      "                          'qualified suppliers for certain device components. '\n",
+      "                          'Ability to attract and retain qualified employees. '\n",
+      "                          'Fluctuations in foreign exchange rates.',\n",
+      "                          'Economic risk from foreign exchange rates, interest '\n",
+      "                          'rates, credit risk, and equity prices. Exposure to '\n",
+      "                          'foreign currency risk including Euro, Japanese yen, '\n",
+      "                          'British pound, Canadian dollar, and Australian '\n",
+      "                          'dollar. Certain forecasted transactions, assets, '\n",
+      "                          'and liabilities are exposed to foreign currency '\n",
+      "                          'risk. Securities held in our fixed-income portfolio '\n",
+      "                          'are subject to different interest rate risks based '\n",
+      "                          'on their maturities. Securities held in our equity '\n",
+      "                          'investments portfolio are subject to market price '\n",
+      "                          'risk. Our fixed-income portfolio is diversified and '\n",
+      "                          'consists primarily of investment-grade securities. '\n",
+      "                          'We use credit default swap contracts to manage '\n",
+      "                          'credit exposures relative to broad-based indices.',\n",
+      "                          'Pending and ongoing litigation, including U.S. cell '\n",
+      "                          'phone litigation related to alleged adverse health '\n",
+      "                          'effects from radio emissions. Risk of adjustments '\n",
+      "                          'and high-value tax contingencies from U.S. IRS '\n",
+      "                          'audits, particularly concerning intercompany '\n",
+      "                          'transfer pricing and large proposed tax '\n",
+      "                          'adjustments. Potential goodwill and intangible '\n",
+      "                          'asset impairments. Integration risks and fair value '\n",
+      "                          'allocation uncertainties associated with large '\n",
+      "                          'acquisitions such as Activision Blizzard and '\n",
+      "                          'Nuance. Uncertain or changing tax regulations, '\n",
+      "                          'especially related to foreign earnings and deferred '\n",
+      "                          'tax asset realization.']}\n",
+      "\n",
+      "==== 10-q ====\n",
+      "{'company_name': 'Microsoft Corporation',\n",
+      " 'material_changes': ['In August 2024, Microsoft announced changes to the '\n",
+      "                      'composition of its segments to align with current '\n",
+      "                      'business management, notably consolidating commercial '\n",
+      "                      'components of Microsoft 365 into the Productivity and '\n",
+      "                      'Business Processes segment. Prior period segment '\n",
+      "                      'information was recast for fiscal year 2025, impacting '\n",
+      "                      'Note 8 – Goodwill, Note 12 – Unearned Revenue, and Note '\n",
+      "                      '17 – Segment Information and Geographic Data.',\n",
+      "                      'In March 2024, Microsoft obtained a non-exclusive '\n",
+      "                      \"license to Inflection AI, Inc.'s intellectual property \"\n",
+      "                      'under an agreement, with Reid Hoffman, a board member, '\n",
+      "                      'being a co-founder and director of Inflection.',\n",
+      "                      'On October 13, 2023, Microsoft completed its '\n",
+      "                      'acquisition of Activision Blizzard, Inc. for $75.4 '\n",
+      "                      'billion, mainly cash. Changes to business segments in '\n",
+      "                      'fiscal year 2025 resulted in reallocation of goodwill '\n",
+      "                      'using a relative fair value approach. Unrecognized tax '\n",
+      "                      'benefits and other income tax liabilities totaled $25.9 '\n",
+      "                      'billion as of September 30, 2024, with IRS seeking an '\n",
+      "                      'additional $28.9 billion plus penalties and interest '\n",
+      "                      'for 2004-2013 related to intercompany transfer pricing. '\n",
+      "                      'On September 16, 2024, Microsoft approved a new $60 '\n",
+      "                      'billion share repurchase program following the previous '\n",
+      "                      'program. In October 2024, the Irish Data Protection '\n",
+      "                      'Commission issued a final decision to LinkedIn '\n",
+      "                      'regarding alleged GDPR violations.',\n",
+      "                      'Microsoft reported financial performance highlights '\n",
+      "                      'including a 22% increase in Cloud revenue to $38.9 '\n",
+      "                      'billion, overall revenue up 16%, driven by growth in '\n",
+      "                      'all segments: Intelligent Cloud, Productivity and '\n",
+      "                      'Business Processes, and More Personal Computing. Gaming '\n",
+      "                      'revenue increased 43%, with Xbox content and services '\n",
+      "                      'revenue growing 61% largely due to the Activision '\n",
+      "                      'Blizzard acquisition, while Xbox hardware revenue '\n",
+      "                      'decreased 29%. Operating income increased 14%. Cost of '\n",
+      "                      'revenue increased 23%, gross margin increased 13%, but '\n",
+      "                      'gross margin percentage decreased due to Intelligent '\n",
+      "                      'Cloud and Cloud AI infrastructure scaling. Expenses '\n",
+      "                      'rose driven by Gaming and cloud engineering '\n",
+      "                      'investments. Effective tax rate increased to 19% from '\n",
+      "                      \"18%, with the OECD's global minimum tax applicable from \"\n",
+      "                      'fiscal year 2025. IRS Notices seek $28.9 billion plus '\n",
+      "                      'penalties and interest.',\n",
+      "                      'Cash from operations increased $3.6 billion to $34.2 '\n",
+      "                      'billion for the quarter ended September 30, 2024. Cash '\n",
+      "                      'used in financing increased $31.3 billion due to higher '\n",
+      "                      'debt repayments. Cash used in investing increased $15.7 '\n",
+      "                      'billion primarily due to lower investment cash flow and '\n",
+      "                      'higher property and equipment additions. During the '\n",
+      "                      'three months ended September 30, 2024, Microsoft '\n",
+      "                      'repurchased 7 million shares for $2.8 billion and '\n",
+      "                      'declared dividends totaling $6.2 billion. A $7.5 '\n",
+      "                      'billion balance remains on the $60 billion share '\n",
+      "                      'repurchase program. Security incidents occurred '\n",
+      "                      'involving a password spray attack in late 2023 '\n",
+      "                      'affecting some Microsoft systems, potentially harming '\n",
+      "                      'reputation and operations.',\n",
+      "                      'Microsoft faces risks including security, privacy, and '\n",
+      "                      'operational execution in products and AI integration; '\n",
+      "                      'potential flaws in AI development causing legal or '\n",
+      "                      'reputational harm; operational infrastructure risks '\n",
+      "                      'like outages and supply chain disruptions; legal, '\n",
+      "                      'regulatory, and litigation challenges including '\n",
+      "                      'significant IRS tax disputes; compliance with data '\n",
+      "                      'privacy regulations such as GDPR; sustainability '\n",
+      "                      'regulatory and commitment risks; intellectual property '\n",
+      "                      'protection issues; and risks from catastrophic events, '\n",
+      "                      'geopolitical conflicts, economic downturns, and '\n",
+      "                      'workforce challenges.'],\n",
+      " 'quarter_end': 'September 30, 2024',\n",
+      " 'quarterly_revenue': 65585.0,\n",
+      " 'revenue_change_pct': 16.04}\n",
+      "\n",
+      "==== 8-k ====\n",
+      "{'company_name': 'Microsoft Corporation',\n",
+      " 'event_date': 'August 21, 2024',\n",
+      " 'event_type': 'Regulation FD Disclosure; Segment and Metric Changes',\n",
+      " 'material_event_description': 'Microsoft Corporation announced updates to its '\n",
+      "                               'reporting segment structure and key '\n",
+      "                               'performance metrics for Fiscal Year 2025. '\n",
+      "                               'Changes include the creation of a new '\n",
+      "                               \"'Microsoft 365 Commercial products and cloud \"\n",
+      "                               \"services' segment within Productivity and \"\n",
+      "                               'Business Processes, realignment of revenues '\n",
+      "                               'for EMS, Power BI, Windows Commercial, Copilot '\n",
+      "                               'Pro, and Nuance Enterprise, and revised '\n",
+      "                               \"metrics such as replacing 'Office Commercial \"\n",
+      "                               \"products and cloud services revenue growth' \"\n",
+      "                               \"with 'Microsoft 365 Commercial cloud revenue \"\n",
+      "                               \"growth.' Microsoft updated its Fiscal Year \"\n",
+      "                               '2025 Q1 financial outlook to reflect these '\n",
+      "                               'structural changes.'}\n",
+      "\n",
+      "==== proxy ====\n",
+      "{'ceo_name': 'Satya Nadella',\n",
+      " 'ceo_total_compensation': None,\n",
+      " 'company_name': 'Microsoft Corporation',\n",
+      " 'meeting_date': '12/10/24',\n",
+      " 'voting_matters': ['Election of Directors',\n",
+      "                    'Advisory Vote to Approve Named Executive Officer '\n",
+      "                    'Compensation (\"say-on-pay vote\")',\n",
+      "                    'Ratification of the Selection of Deloitte & Touche LLP as '\n",
+      "                    'our Independent Auditor for Fiscal Year 2025',\n",
+      "                    'Report on Risks of Weapons Development',\n",
+      "                    'Assessment of Investing in Bitcoin',\n",
+      "                    'Report on Data Operations in Human Rights Hotspots',\n",
+      "                    'Report on Artificial Intelligence and Machine Learning '\n",
+      "                    'Tools for Oil and Gas Development and Production',\n",
+      "                    'Report on AI Misinformation and Disinformation',\n",
+      "                    'Report on AI Data Sourcing Accountability']}\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Pretty print a subset of fields for each type\n",
+    "import json\n",
+    "from pprint import pprint\n",
+    "\n",
+    "\n",
+    "def summarize(doc_type: str, data: dict):\n",
+    "    print(f\"\\n==== {doc_type} ====\")\n",
+    "    if doc_type == \"10-k\":\n",
+    "        keys = [\n",
+    "            \"company_name\",\n",
+    "            \"fiscal_year_end\",\n",
+    "            \"annual_revenue\",\n",
+    "            \"net_income\",\n",
+    "            \"primary_risk_factors\",\n",
+    "        ]\n",
+    "    elif doc_type == \"10-q\":\n",
+    "        keys = [\n",
+    "            \"company_name\",\n",
+    "            \"quarter_end\",\n",
+    "            \"quarterly_revenue\",\n",
+    "            \"revenue_change_pct\",\n",
+    "            \"material_changes\",\n",
+    "        ]\n",
+    "    elif doc_type == \"8-k\":\n",
+    "        keys = [\n",
+    "            \"company_name\",\n",
+    "            \"event_date\",\n",
+    "            \"event_type\",\n",
+    "            \"material_event_description\",\n",
+    "        ]\n",
+    "    else:  # Proxy\n",
+    "        keys = [\n",
+    "            \"company_name\",\n",
+    "            \"meeting_date\",\n",
+    "            \"ceo_name\",\n",
+    "            \"ceo_total_compensation\",\n",
+    "            \"voting_matters\",\n",
+    "        ]\n",
+    "    subset = {k: data.get(k) for k in keys}\n",
+    "    pprint(subset)\n",
+    "\n",
+    "\n",
+    "for t, out in results_by_type.items():\n",
+    "    summarize(out[\"type\"], out[\"data\"])"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "llama_parse",
+   "language": "python",
+   "name": "llama_parse"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}