Skip to content

Commit 6fd9f14

Browse files
Merge remote-tracking branch 'origin' into pre/beta
2 parents 36875b6 + 720072c commit 6fd9f14

File tree

2 files changed

+62
-3
lines changed

2 files changed

+62
-3
lines changed
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
"""
2+
Basic example of scraping pipeline using Code Generator with schema
3+
"""
4+
5+
import os, json
6+
from typing import List
7+
from dotenv import load_dotenv
8+
from pydantic import BaseModel, Field
9+
from scrapegraphai.graphs import CodeGeneratorGraph
10+
11+
load_dotenv()
12+
13+
# ************************************************
14+
# Define the output schema for the graph
15+
# ************************************************
16+
17+
class Project(BaseModel):
18+
title: str = Field(description="The title of the project")
19+
description: str = Field(description="The description of the project")
20+
21+
class Projects(BaseModel):
22+
projects: List[Project]
23+
24+
# ************************************************
25+
# Define the configuration for the graph
26+
# ************************************************
27+
28+
together_key = os.getenv("TOGETHER_KEY")
29+
30+
graph_config = {
31+
"llm": {
32+
"model": "togetherai/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
33+
"api_key": together_key,
34+
},
35+
"verbose": True,
36+
"headless": False,
37+
"reduction": 2,
38+
"max_iterations": {
39+
"overall": 10,
40+
"syntax": 3,
41+
"execution": 3,
42+
"validation": 3,
43+
"semantic": 3
44+
},
45+
"output_file_name": "extracted_data.py"
46+
}
47+
48+
# ************************************************
49+
# Create the SmartScraperGraph instance and run it
50+
# ************************************************
51+
52+
code_generator_graph = CodeGeneratorGraph(
53+
prompt="List me all the projects with their description",
54+
source="https://perinim.github.io/projects/",
55+
schema=Projects,
56+
config=graph_config
57+
)
58+
59+
result = code_generator_graph.run()
60+
print(result)

pyproject.toml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,10 @@ dependencies = [
2929
"playwright>=1.43.0",
3030
"undetected-playwright>=0.3.0",
3131
"langchain-ollama>=0.1.3",
32-
"simpleeval>=1.0.0",
32+
"qdrant-client>=1.11.3",
33+
"fastembed>=0.3.6"
3334
"semchunk>=2.2.0",
3435
"transformers>=4.44.2",
35-
"qdrant-client>=1.11.3",
36-
"fastembed>=0.3.6",
3736
"googlesearch-python>=1.2.5"
3837
]
3938

0 commit comments

Comments
 (0)