Skip to content

Commit 04fdb5d

Browse files
Merge pull request #9 from VinciGit00/pre/beta
add possiibility to save the code
2 parents ce841e2 + bcf02e5 commit 04fdb5d

File tree

3 files changed

+56
-9
lines changed

3 files changed

+56
-9
lines changed

examples/code_generation/simple_with_schema.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ class Projects(BaseModel):
4242
"validation": 3,
4343
"semantic": 3
4444
},
45+
"output_file_name": "extracted_data.py"
4546
}
4647

4748
# ************************************************

extract_data.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
def extract_data(html: str) -> dict:
2+
from bs4 import BeautifulSoup
3+
4+
# Parse the HTML content using BeautifulSoup
5+
soup = BeautifulSoup(html, 'html.parser')
6+
7+
# Initialize an empty list to hold project data
8+
projects = []
9+
10+
# Find all project entries in the HTML
11+
project_entries = soup.find_all('div', class_='grid-item')
12+
13+
# Iterate over each project entry to extract title and description
14+
for entry in project_entries:
15+
# Extract the title from the h4 element
16+
title = entry.find('h4', class_='card-title').get_text(strip=True)
17+
# Extract the description from the p element
18+
description = entry.find('p', class_='card-text').get_text(strip=True)
19+
20+
# Append the extracted data as a dictionary to the projects list
21+
projects.append({
22+
'title': title,
23+
'description': description
24+
})
25+
26+
# Return the structured data as a dictionary matching the desired JSON schema
27+
return {'projects': projects}

scrapegraphai/graphs/code_generator_graph.py

Lines changed: 28 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,17 +17,17 @@
1717

1818
class CodeGeneratorGraph(AbstractGraph):
1919
"""
20-
CodeGeneratorGraph is a script generator pipeline that generates the function extract_data(html: str) -> dict() for
21-
extarcting the wanted informations from a HTML page. The code generated is in Python and uses the library BeautifulSoup.
22-
It requires a user prompt, a source URL, and a output schema.
23-
20+
CodeGeneratorGraph is a script generator pipeline that generates the function extract_data(html: str) -> dict() for
21+
extracting the wanted information from a HTML page. The code generated is in Python and uses the library BeautifulSoup.
22+
It requires a user prompt, a source URL, and an output schema.
23+
2424
Attributes:
2525
prompt (str): The prompt for the graph.
2626
source (str): The source of the graph.
2727
config (dict): Configuration parameters for the graph.
2828
schema (BaseModel): The schema for the graph output.
2929
llm_model: An instance of a language model client, configured for generating answers.
30-
embedder_model: An instance of an embedding model client,
30+
embedder_model: An instance of an embedding model client,
3131
configured for generating embeddings.
3232
verbose (bool): A flag indicating whether to show print statements during execution.
3333
headless (bool): A flag indicating whether to run the graph in headless mode.
@@ -96,7 +96,6 @@ def _create_graph(self) -> BaseGraph:
9696
"schema": self.schema,
9797
}
9898
)
99-
10099
prompt_refier_node = PromptRefinerNode(
101100
input="user_prompt",
102101
output=["refined_prompt"],
@@ -106,7 +105,6 @@ def _create_graph(self) -> BaseGraph:
106105
"schema": self.schema
107106
}
108107
)
109-
110108
html_analyzer_node = HtmlAnalyzerNode(
111109
input="refined_prompt & original_html",
112110
output=["html_info", "reduced_html"],
@@ -117,7 +115,6 @@ def _create_graph(self) -> BaseGraph:
117115
"reduction": self.config.get("reduction", 0)
118116
}
119117
)
120-
121118
generate_code_node = GenerateCodeNode(
122119
input="user_prompt & refined_prompt & html_info & reduced_html & answer",
123120
output=["generated_code"],
@@ -166,4 +163,26 @@ def run(self) -> str:
166163
inputs = {"user_prompt": self.prompt, self.input_key: self.source}
167164
self.final_state, self.execution_info = self.graph.execute(inputs)
168165

169-
return self.final_state.get("generated_code", "No code created.")
166+
generated_code = self.final_state.get("generated_code", "No code created.")
167+
168+
if self.config.get("filename") is None:
169+
filename = "extracted_data.py"
170+
elif ".py" not in self.config.get("filename"):
171+
filename += ".py"
172+
else:
173+
filename = self.config.get("filename")
174+
175+
self.save_code_to_file(generated_code, filename)
176+
177+
return generated_code
178+
179+
def save_code_to_file(self, code: str, filename:str) -> None:
180+
"""
181+
Saves the generated code to a Python file.
182+
183+
Args:
184+
code (str): The generated code to be saved.
185+
filename (str): name of the output file
186+
"""
187+
with open(filename, "w") as file:
188+
file.write(code)

0 commit comments

Comments
 (0)