1- from __future__ import annotations
1+ """
2+ Simple LangChain Agent Example
23
3- from typing import Any , Dict , List , Optional
4+ Demonstrates using Thordata tools with a LangChain agent to:
5+ 1. Search for information
6+ 2. Scrape a webpage
7+ 3. Summarize the content
48
9+ Requirements:
10+ pip install langchain-openai openai
11+
12+ Usage:
13+ export OPENAI_API_KEY=your_key
14+ python examples/simple_agent.py
15+ """
16+
17+ import os
18+ import sys
519from dotenv import load_dotenv
20+
21+ load_dotenv ()
22+
23+ # Check required environment variables
24+ if not os .getenv ("THORDATA_SCRAPER_TOKEN" ):
25+ print ("❌ Error: Set THORDATA_SCRAPER_TOKEN in your .env file" )
26+ sys .exit (1 )
27+
28+ if not os .getenv ("OPENAI_API_KEY" ):
29+ print ("❌ Error: Set OPENAI_API_KEY in your .env file" )
30+ sys .exit (1 )
31+
632from langchain_openai import ChatOpenAI
733from langchain_core .messages import HumanMessage
834
935from thordata_langchain_tools import ThordataSerpTool , ThordataScrapeTool
1036
11- load_dotenv () # Load OPENAI_API_KEY and THORDATA_* from a local .env file
12-
1337
14- def find_thordata_homepage (max_results : int = 5 ) -> Optional [str ]:
15- """
16- Use ThordataSerpTool to find the official Thordata homepage URL.
17-
18- Returns:
19- The first organic result's link, or None if nothing is found.
20- """
38+ def search_for_homepage (query : str ) -> str :
39+ """Use SERP tool to find a homepage URL."""
40+ print (f"🔍 Searching for: '{ query } '" )
41+
2142 serp_tool = ThordataSerpTool ()
43+ results = serp_tool .invoke ({
44+ "query" : query ,
45+ "engine" : "google" ,
46+ "num" : 3 ,
47+ })
2248
23- serp_result : Dict [str , Any ] = serp_tool .invoke (
24- {
25- "query" : "Thordata official homepage" ,
26- "engine" : "google" ,
27- "num" : max_results ,
28- }
29- )
30-
31- organic : List [Dict [str , Any ]] = serp_result .get ("organic" ) or []
49+ organic = results .get ("organic" , [])
3250 for item in organic :
33- link = item .get ("link" )
34- if link :
51+ link = item .get ("link" , "" )
52+ if link and "thordata" in link . lower () :
3553 return link
54+
55+ # Return first result if no thordata link found
56+ if organic :
57+ return organic [0 ].get ("link" , "" )
58+
59+ raise RuntimeError ("No results found" )
60+
61+
62+ def scrape_page (url : str ) -> str :
63+ """Use Scrape tool to get page content."""
64+ print (f"📄 Scraping: { url } " )
65+
66+ scrape_tool = ThordataScrapeTool ()
67+ html = scrape_tool .invoke ({
68+ "url" : url ,
69+ "js_render" : False ,
70+ "max_length" : 5000 ,
71+ })
72+
73+ return html
74+
75+
76+ def summarize_with_llm (html : str , topic : str ) -> str :
77+ """Use LLM to summarize the content."""
78+ print ("🤖 Summarizing with LLM..." )
79+
80+ llm = ChatOpenAI (model = "gpt-4o-mini" , temperature = 0 )
3681
37- return None
82+ prompt = f"""You are a helpful assistant that summarizes web content.
3883
84+ Based on the following HTML content, provide a brief summary about { topic } .
85+ Focus on the key products, services, or features mentioned.
3986
40- def scrape_url (url : str ) -> str :
41- """
42- Use ThordataScrapeTool to fetch the HTML of a given URL.
87+ Provide your summary in 3-5 bullet points.
4388
44- The tool itself truncates overly long HTML to avoid huge LLM inputs.
45- """
46- scrape_tool = ThordataScrapeTool ()
47- result = scrape_tool .invoke (
48- {
49- "url" : url ,
50- "js_render" : False ,
51- "output_format" : "HTML" ,
52- }
53- )
54-
55- if isinstance (result , str ):
56- return result
57- return str (result )
58-
59-
60- def summarize_html_with_llm (html : str ) -> str :
61- """
62- Call OpenAI (via LangChain ChatOpenAI) exactly once to summarize the HTML.
63- We deliberately truncate the HTML to keep the token count very small,
64- so that it fits comfortably under strict TPM limits.
65- """
66- # Hard cap: we only keep the first 3000 characters of the HTML.
67- # 3000 chars ~= 1000–1500 tokens, which is safe for your 60k TPM limit.
68- MAX_HTML_FOR_LLM = 3000
69- if len (html ) > MAX_HTML_FOR_LLM :
70- html = (
71- html [:MAX_HTML_FOR_LLM ]
72- + "\n \n [Truncated to first "
73- f"{ MAX_HTML_FOR_LLM } characters before sending to the LLM]"
74- )
75-
76- llm = ChatOpenAI (model = "gpt-4o-mini" , temperature = 0 )
77-
78- prompt = (
79- "You are a technical writer.\n "
80- "You will be given (a truncated portion of) the Thordata homepage HTML.\n "
81- "Based on this excerpt, summarize Thordata's core products and services "
82- "in at most 5 bullet points.\n "
83- "Be concise and concrete, and avoid marketing fluff.\n \n "
84- f"HTML content (truncated):\n { html } "
85- )
89+ HTML Content:
90+ { html [:4000 ]}
91+ """
8692
8793 response = llm .invoke ([HumanMessage (content = prompt )])
8894 return response .content
8995
9096
91- if __name__ == "__main__" :
92- homepage_url = find_thordata_homepage ()
97+ def main ():
98+ print ("=" * 60 )
99+ print ("🚀 Thordata LangChain Agent Demo" )
100+ print ("=" * 60 )
101+ print ()
93102
94- if not homepage_url :
95- print ("Could not determine Thordata homepage URL from SERP results." )
96- raise SystemExit (1 )
103+ try :
104+ # Step 1: Search for Thordata
105+ url = search_for_homepage ("Thordata proxy network official site" )
106+ print (f" Found URL: { url } \n " )
97107
98- print (f"Detected Thordata homepage URL: { homepage_url } " )
108+ # Step 2: Scrape the page
109+ html = scrape_page (url )
110+ print (f" Scraped { len (html )} characters\n " )
99111
100- html = scrape_url (homepage_url )
112+ # Step 3: Summarize
113+ summary = summarize_with_llm (html , "Thordata's services" )
114+
115+ print ()
116+ print ("=" * 60 )
117+ print ("📋 Summary:" )
118+ print ("=" * 60 )
119+ print (summary )
101120
102- summary = summarize_html_with_llm (html )
121+ except Exception as e :
122+ print (f"❌ Error: { e } " )
123+ sys .exit (1 )
103124
104- print ("\n === Summary of Thordata Services ===" )
105- print (summary )
125+
126+ if __name__ == "__main__" :
127+ main ()
0 commit comments