1+ """
2+ Basic example of scraping pipeline using Code Generator with schema
3+ """
4+
5+ import os , json
6+ from typing import List
7+ from dotenv import load_dotenv
8+ from langchain_core .pydantic_v1 import BaseModel , Field
9+ from scrapegraphai .graphs import CodeGeneratorGraph
10+
11+ load_dotenv ()
12+
13+ # ************************************************
14+ # Define the output schema for the graph
15+ # ************************************************
16+
17+ class Project (BaseModel ):
18+ title : str = Field (description = "The title of the project" )
19+ description : str = Field (description = "The description of the project" )
20+
21+ class Projects (BaseModel ):
22+ projects : List [Project ]
23+
24+ # ************************************************
25+ # Define the configuration for the graph
26+ # ************************************************
27+
28+ openai_key = os .getenv ("OPENAI_APIKEY" )
29+
30+ graph_config = {
31+ "llm" : {
32+ "model" : "ernie/ernie-bot-turbo" ,
33+ "ernie_client_id" : "<ernie_client_id>" ,
34+ "ernie_client_secret" : "<ernie_client_secret>" ,
35+ "temperature" : 0.1
36+ },
37+ "verbose" : True ,
38+ "headless" : False ,
39+ "reduction" : 2 ,
40+ "max_iterations" : {
41+ "overall" : 10 ,
42+ "syntax" : 3 ,
43+ "execution" : 3 ,
44+ "validation" : 3 ,
45+ "semantic" : 3
46+ },
47+ "output_file_name" : "extracted_data.py"
48+ }
49+
50+ # ************************************************
51+ # Create the SmartScraperGraph instance and run it
52+ # ************************************************
53+
54+ code_generator_graph = CodeGeneratorGraph (
55+ prompt = "List me all the projects with their description" ,
56+ source = "https://perinim.github.io/projects/" ,
57+ schema = Projects ,
58+ config = graph_config
59+ )
60+
61+ result = code_generator_graph .run ()
62+ print (result )
0 commit comments