Thordata
diff --git a/‎.env.example‎
Lines changed: 14 additions & 7 deletions b/‎.env.example‎
Lines changed: 14 additions & 7 deletions
diff --git a/‎examples/simple_agent.py‎
Lines changed: 100 additions & 78 deletions b/‎examples/simple_agent.py‎
Lines changed: 100 additions & 78 deletions
diff --git a/‎examples/simple_scrape.py‎
Lines changed: 42 additions & 17 deletions b/‎examples/simple_scrape.py‎
Lines changed: 42 additions & 17 deletions
diff --git a/‎examples/simple_serp.py‎
Lines changed: 54 additions & 11 deletions b/‎examples/simple_serp.py‎
Lines changed: 54 additions & 11 deletions
@@ -1,9 +1,16 @@
-# Copy this file to .env and fill in your Thordata credentials.
-# Never commit your .env file to version control.
+# Thordata LangChain Tools Configuration
+# Copy this file to .env and fill in your credentials
 
-# Found at the bottom of the Thordata Dashboard
-THORDATA_SCRAPER_TOKEN=replace_with_your_scraper_token
+# Required: Get from Thordata Dashboard
+THORDATA_SCRAPER_TOKEN=your_scraper_token_here
 
-# Found in the "Public API" section
-THORDATA_PUBLIC_TOKEN=replace_with_your_public_token
-THORDATA_PUBLIC_KEY=replace_with_your_public_key
+# Optional: For advanced features
+THORDATA_PUBLIC_TOKEN=your_public_token_here
+THORDATA_PUBLIC_KEY=your_public_key_here
+
+# Optional: For geo-targeted proxy requests
+THORDATA_USERNAME=your_proxy_username
+THORDATA_PASSWORD=your_proxy_password
+
+# Optional: For agent examples
+OPENAI_API_KEY=your_openai_key_here
@@ -1,105 +1,127 @@
-from __future__ import annotations
+"""
+Simple LangChain Agent Example
 
-from typing import Any, Dict, List, Optional
+Demonstrates using Thordata tools with a LangChain agent to:
+1. Search for information
+2. Scrape a webpage
+3. Summarize the content
 
+Requirements:
+    pip install langchain-openai openai
+
+Usage:
+    export OPENAI_API_KEY=your_key
+    python examples/simple_agent.py
+"""
+
+import os
+import sys
 from dotenv import load_dotenv
+
+load_dotenv()
+
+# Check required environment variables
+if not os.getenv("THORDATA_SCRAPER_TOKEN"):
+    print("❌ Error: Set THORDATA_SCRAPER_TOKEN in your .env file")
+    sys.exit(1)
+
+if not os.getenv("OPENAI_API_KEY"):
+    print("❌ Error: Set OPENAI_API_KEY in your .env file")
+    sys.exit(1)
+
 from langchain_openai import ChatOpenAI
 from langchain_core.messages import HumanMessage
 
 from thordata_langchain_tools import ThordataSerpTool, ThordataScrapeTool
 
-load_dotenv()  # Load OPENAI_API_KEY and THORDATA_* from a local .env file
-
 
-def find_thordata_homepage(max_results: int = 5) -> Optional[str]:
-    """
-    Use ThordataSerpTool to find the official Thordata homepage URL.
-
-    Returns:
-        The first organic result's link, or None if nothing is found.
-    """
+def search_for_homepage(query: str) -> str:
+    """Use SERP tool to find a homepage URL."""
+    print(f"🔍 Searching for: '{query}'")
+    
     serp_tool = ThordataSerpTool()
+    results = serp_tool.invoke({
+        "query": query,
+        "engine": "google",
+        "num": 3,
+    })
 
-    serp_result: Dict[str, Any] = serp_tool.invoke(
-        {
-            "query": "Thordata official homepage",
-            "engine": "google",
-            "num": max_results,
-        }
-    )
-
-    organic: List[Dict[str, Any]] = serp_result.get("organic") or []
+    organic = results.get("organic", [])
     for item in organic:
-        link = item.get("link")
-        if link:
+        link = item.get("link", "")
+        if link and "thordata" in link.lower():
             return link
+    
+    # Return first result if no thordata link found
+    if organic:
+        return organic[0].get("link", "")
+    
+    raise RuntimeError("No results found")
+
+
+def scrape_page(url: str) -> str:
+    """Use Scrape tool to get page content."""
+    print(f"📄 Scraping: {url}")
+    
+    scrape_tool = ThordataScrapeTool()
+    html = scrape_tool.invoke({
+        "url": url,
+        "js_render": False,
+        "max_length": 5000,
+    })
+    
+    return html
+
+
+def summarize_with_llm(html: str, topic: str) -> str:
+    """Use LLM to summarize the content."""
+    print("🤖 Summarizing with LLM...")
+    
+    llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
 
-    return None
+    prompt = f"""You are a helpful assistant that summarizes web content.
 
+Based on the following HTML content, provide a brief summary about {topic}.
+Focus on the key products, services, or features mentioned.
 
-def scrape_url(url: str) -> str:
-    """
-    Use ThordataScrapeTool to fetch the HTML of a given URL.
+Provide your summary in 3-5 bullet points.
 
-    The tool itself truncates overly long HTML to avoid huge LLM inputs.
-    """
-    scrape_tool = ThordataScrapeTool()
-    result = scrape_tool.invoke(
-        {
-            "url": url,
-            "js_render": False,
-            "output_format": "HTML",
-        }
-    )
-
-    if isinstance(result, str):
-        return result
-    return str(result)
-
-
-def summarize_html_with_llm(html: str) -> str:
-    """
-    Call OpenAI (via LangChain ChatOpenAI) exactly once to summarize the HTML.
-    We deliberately truncate the HTML to keep the token count very small,
-    so that it fits comfortably under strict TPM limits.
-    """
-    # Hard cap: we only keep the first 3000 characters of the HTML.
-    # 3000 chars ~= 1000–1500 tokens, which is safe for your 60k TPM limit.
-    MAX_HTML_FOR_LLM = 3000
-    if len(html) > MAX_HTML_FOR_LLM:
-        html = (
-            html[:MAX_HTML_FOR_LLM]
-            + "\n\n[Truncated to first "
-            f"{MAX_HTML_FOR_LLM} characters before sending to the LLM]"
-        )
-
-    llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
-
-    prompt = (
-        "You are a technical writer.\n"
-        "You will be given (a truncated portion of) the Thordata homepage HTML.\n"
-        "Based on this excerpt, summarize Thordata's core products and services "
-        "in at most 5 bullet points.\n"
-        "Be concise and concrete, and avoid marketing fluff.\n\n"
-        f"HTML content (truncated):\n{html}"
-    )
+HTML Content:
+{html[:4000]}
+"""
 
     response = llm.invoke([HumanMessage(content=prompt)])
     return response.content
 
 
-if __name__ == "__main__":
-    homepage_url = find_thordata_homepage()
+def main():
+    print("=" * 60)
+    print("🚀 Thordata LangChain Agent Demo")
+    print("=" * 60)
+    print()
 
-    if not homepage_url:
-        print("Could not determine Thordata homepage URL from SERP results.")
-        raise SystemExit(1)
+    try:
+        # Step 1: Search for Thordata
+        url = search_for_homepage("Thordata proxy network official site")
+        print(f"   Found URL: {url}\n")
 
-    print(f"Detected Thordata homepage URL: {homepage_url}")
+        # Step 2: Scrape the page
+        html = scrape_page(url)
+        print(f"   Scraped {len(html)} characters\n")
 
-    html = scrape_url(homepage_url)
+        # Step 3: Summarize
+        summary = summarize_with_llm(html, "Thordata's services")
+        
+        print()
+        print("=" * 60)
+        print("📋 Summary:")
+        print("=" * 60)
+        print(summary)
 
-    summary = summarize_html_with_llm(html)
+    except Exception as e:
+        print(f"❌ Error: {e}")
+        sys.exit(1)
 
-    print("\n=== Summary of Thordata Services ===")
-    print(summary)
+
+if __name__ == "__main__":
+    main()
@@ -1,24 +1,49 @@
-from dotenv import load_dotenv
+"""
+Simple Web Scraping Example
 
-from thordata_langchain_tools import ThordataScrapeTool
+Demonstrates using ThordataScrapeTool to fetch webpage content.
+
+Usage:
+    python examples/simple_scrape.py
+"""
+
+import os
+import sys
+from dotenv import load_dotenv
 
 load_dotenv()
 
+if not os.getenv("THORDATA_SCRAPER_TOKEN"):
+    print("❌ Error: Set THORDATA_SCRAPER_TOKEN in your .env file")
+    sys.exit(1)
 
-if __name__ == "__main__":
+from thordata_langchain_tools import ThordataScrapeTool
+
+
+def main():
     tool = ThordataScrapeTool()
 
-    result = tool.invoke(
-        {
-            "url": "https://www.thordata.com",
-            "js_render": False,
-            "output_format": "HTML",
-        }
-    )
-
-    # For HTML output this will be a long string.
-    # Print only the first 1000 characters to keep the console readable.
-    if isinstance(result, str):
-        print(result[:1000])
-    else:
-        print(result)
+    url = "https://example.com"
+    print(f"🌐 Scraping: {url}")
+    print()
+
+    # Scrape the page
+    html = tool.invoke({
+        "url": url,
+        "js_render": False,
+        "max_length": 2000,
+    })
+
+    if html.startswith("Error"):
+        print(f"❌ {html}")
+        return
+
+    print("📄 HTML Content (first 1000 chars):")
+    print("-" * 50)
+    print(html[:1000])
+    print("-" * 50)
+    print(f"\n✅ Successfully scraped {len(html)} characters")
+
+
+if __name__ == "__main__":
+    main()
@@ -1,18 +1,61 @@
+"""
+Simple SERP Search Example
+
+Demonstrates using ThordataSerpTool to search the web.
+
+Usage:
+    python examples/simple_serp.py
+"""
+
+import os
+import sys
 from dotenv import load_dotenv
 
-from thordata_langchain_tools import ThordataSerpTool
+# Load environment variables
+load_dotenv()
 
-load_dotenv()  # Load THORDATA_* tokens and keys from a local .env file
+# Check for required token
+if not os.getenv("THORDATA_SCRAPER_TOKEN"):
+    print("❌ Error: Set THORDATA_SCRAPER_TOKEN in your .env file")
+    sys.exit(1)
 
+from thordata_langchain_tools import ThordataSerpTool
 
-if __name__ == "__main__":
+
+def main():
+    # Create the tool
     tool = ThordataSerpTool()
 
-    result = tool.invoke(
-        {
-            "query": "Thordata proxy network",
-            "engine": "google",
-            "num": 3,
-        }
-    )
-    print(result)
+    print("🔍 Searching for: 'Python web scraping best practices'")
+    print()
+
+    # Execute search
+    results = tool.invoke({
+        "query": "Python web scraping best practices",
+        "engine": "google",
+        "num": 5,
+    })
+
+    # Check for errors
+    if "error" in results:
+        print(f"❌ Error: {results['error']}")
+        return
+
+    # Display organic results
+    organic = results.get("organic", [])
+    print(f"📊 Found {len(organic)} organic results:\n")
+
+    for i, item in enumerate(organic, 1):
+        title = item.get("title", "No title")
+        link = item.get("link", "No link")
+        snippet = item.get("snippet", "")[:100]
+
+        print(f"{i}. {title}")
+        print(f"   🔗 {link}")
+        if snippet:
+            print(f"   📝 {snippet}...")
+        print()
+
+
+if __name__ == "__main__":
+    main()