Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion examples/openai/md_scraper_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
# ************************************************

md_scraper_graph = DocumentScraperGraph(
prompt="List me all the authors, title and genres of the books",
prompt="List me all the projects",
source=text, # Pass the content of the file, not the file object
config=graph_config
)
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ keywords = [
"web scraping tool",
"webscraping",
"graph",
"llm"
]
classifiers = [
"Intended Audience :: Developers",
Expand Down
2 changes: 1 addition & 1 deletion scrapegraphai/builders/graph_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def build_graph(self):
Returns:
dict: A JSON representation of the graph configuration.
"""
return self.chain.invoke(self.prompt)
return self.chain.ainvoke(self.prompt)

@staticmethod
def convert_json_to_graphviz(json_data, format: str = 'pdf'):
Expand Down
6 changes: 3 additions & 3 deletions scrapegraphai/nodes/generate_answer_csv_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def __init__(

self.additional_info = node_config.get("additional_info")

def execute(self, state):
async def execute(self, state):
"""
Generates an answer by constructing a prompt from the user's input and the scraped
content, querying the language model, and parsing its response.
Expand Down Expand Up @@ -126,7 +126,7 @@ def execute(self, state):
)

chain = prompt | self.llm_model | output_parser
answer = chain.invoke({"question": user_prompt})
answer = chain.ainvoke({"question": user_prompt})
state.update({self.output[0]: answer})
return state

Expand Down Expand Up @@ -157,7 +157,7 @@ def execute(self, state):
)

merge_chain = merge_prompt | self.llm_model | output_parser
answer = merge_chain.invoke({"context": batch_results, "question": user_prompt})
answer = await merge_chain.ainvoke({"context": batch_results, "question": user_prompt})

state.update({self.output[0]: answer})
return state
56 changes: 33 additions & 23 deletions scrapegraphai/nodes/generate_answer_node.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
generate_answer_node module
GenerateAnswerNode Module
"""
from typing import List, Optional
from langchain.prompts import PromptTemplate
Expand All @@ -19,24 +19,24 @@

class GenerateAnswerNode(BaseNode):
"""
Initializes the GenerateAnswerNode class.

Args:
input (str): The input data type for the node.
output (List[str]): The output data type(s) for the node.
node_config (Optional[dict]): Configuration dictionary for the node,
which includes the LLM model, verbosity, schema, and other settings.
Defaults to None.
node_name (str): The name of the node. Defaults to "GenerateAnswer".

Attributes:
llm_model: The language model specified in the node configuration.
verbose (bool): Whether verbose mode is enabled.
force (bool): Whether to force certain behaviors, overriding defaults.
script_creator (bool): Whether the node is in script creation mode.
is_md_scraper (bool): Whether the node is scraping markdown data.
additional_info (Optional[str]): Any additional information to be
included in the prompt templates.
Initializes the GenerateAnswerNode class.

Args:
input (str): The input data type for the node.
output (List[str]): The output data type(s) for the node.
node_config (Optional[dict]): Configuration dictionary for the node,
which includes the LLM model, verbosity, schema, and other settings.
Defaults to None.
node_name (str): The name of the node. Defaults to "GenerateAnswer".

Attributes:
llm_model: The language model specified in the node configuration.
verbose (bool): Whether verbose mode is enabled.
force (bool): Whether to force certain behaviors, overriding defaults.
script_creator (bool): Whether the node is in script creation mode.
is_md_scraper (bool): Whether the node is scraping markdown data.
additional_info (Optional[str]): Any additional information to be
included in the prompt templates.
"""
def __init__(
self,
Expand All @@ -57,7 +57,17 @@ def __init__(
self.is_md_scraper = node_config.get("is_md_scraper", False)
self.additional_info = node_config.get("additional_info")

def execute(self, state: dict) -> dict:
async def execute(self, state: dict) -> dict:
"""
Executes the GenerateAnswerNode.

Args:
state (dict): The current state of the graph. The input keys will be used
to fetch the correct data from the state.

Returns:
dict: The updated state with the output key containing the generated answer.
"""
self.logger.info(f"--- Executing {self.node_name} Node ---")

input_keys = self.get_input_keys(state)
Expand Down Expand Up @@ -113,7 +123,7 @@ def execute(self, state: dict) -> dict:
chain = prompt | self.llm_model
if output_parser:
chain = chain | output_parser
answer = chain.invoke({"question": user_prompt})
answer = await chain.ainvoke({"question": user_prompt})

state.update({self.output[0]: answer})
return state
Expand All @@ -133,7 +143,7 @@ def execute(self, state: dict) -> dict:
chains_dict[chain_name] = chains_dict[chain_name] | output_parser

async_runner = RunnableParallel(**chains_dict)
batch_results = async_runner.invoke({"question": user_prompt})
batch_results = await async_runner.ainvoke({"question": user_prompt})

merge_prompt = PromptTemplate(
template=template_merge_prompt,
Expand All @@ -144,7 +154,7 @@ def execute(self, state: dict) -> dict:
merge_chain = merge_prompt | self.llm_model
if output_parser:
merge_chain = merge_chain | output_parser
answer = merge_chain.invoke({"context": batch_results, "question": user_prompt})
answer = await merge_chain.ainvoke({"context": batch_results, "question": user_prompt})

state.update({self.output[0]: answer})
return state
2 changes: 1 addition & 1 deletion scrapegraphai/nodes/generate_answer_node_k_level.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ def execute(self, state: dict) -> dict:
merge_chain = merge_prompt | self.llm_model
if output_parser:
merge_chain = merge_chain | output_parser
answer = merge_chain.invoke({"context": batch_results, "question": user_prompt})
answer = merge_chain.ainvoke({"context": batch_results, "question": user_prompt})

state["answer"] = answer

Expand Down
4 changes: 2 additions & 2 deletions scrapegraphai/nodes/generate_answer_omni_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def execute(self, state: dict) -> dict:
)

chain = prompt | self.llm_model | output_parser
answer = chain.invoke({"question": user_prompt})
answer = chain.ainvoke({"question": user_prompt})

state.update({self.output[0]: answer})
return state
Expand Down Expand Up @@ -154,7 +154,7 @@ def execute(self, state: dict) -> dict:
)

merge_chain = merge_prompt | self.llm_model | output_parser
answer = merge_chain.invoke({"context": batch_results, "question": user_prompt})
answer = merge_chain.ainvoke({"context": batch_results, "question": user_prompt})

state.update({self.output[0]: answer})
return state
4 changes: 2 additions & 2 deletions scrapegraphai/nodes/generate_answer_pdf_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def execute(self, state):
},
)
chain = prompt | self.llm_model | output_parser
answer = chain.invoke({"question": user_prompt})
answer = chain.ainvoke({"question": user_prompt})


state.update({self.output[0]: answer})
Expand Down Expand Up @@ -162,7 +162,7 @@ def execute(self, state):
)

merge_chain = merge_prompt | self.llm_model | output_parser
answer = merge_chain.invoke({"context": batch_results, "question": user_prompt})
answer = merge_chain.ainvoke({"context": batch_results, "question": user_prompt})

state.update({self.output[0]: answer})
return state
4 changes: 2 additions & 2 deletions scrapegraphai/nodes/generate_code_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@ def generate_initial_code(self, state: dict) -> str:
output_parser = StrOutputParser()

chain = prompt | self.llm_model | output_parser
generated_code = chain.invoke({})
generated_code = chain.ainvoke({})
return generated_code

def semantic_comparison(self, generated_result: Any, reference_result: Any) -> Dict[str, Any]:
Expand Down Expand Up @@ -368,7 +368,7 @@ def semantic_comparison(self, generated_result: Any, reference_result: Any) -> D
)

chain = prompt | self.llm_model | output_parser
return chain.invoke({
return chain.ainvoke({
"generated_result": json.dumps(generated_result, indent=2),
"reference_result": json.dumps(reference_result_dict, indent=2)
})
Expand Down
2 changes: 1 addition & 1 deletion scrapegraphai/nodes/generate_scraper_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def execute(self, state: dict) -> dict:
)
map_chain = prompt | self.llm_model | StrOutputParser()

answer = map_chain.invoke({"question": user_prompt})
answer = map_chain.ainvoke({"question": user_prompt})

state.update({self.output[0]: answer})
return state
2 changes: 1 addition & 1 deletion scrapegraphai/nodes/html_analyzer_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def execute(self, state: dict) -> dict:
output_parser = StrOutputParser()

chain = prompt | self.llm_model | output_parser
html_analysis = chain.invoke({})
html_analysis = chain.ainvoke({})

state.update({self.output[0]: html_analysis, self.output[1]: reduced_html})
return state
2 changes: 1 addition & 1 deletion scrapegraphai/nodes/merge_answers_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def execute(self, state: dict) -> dict:
)

merge_chain = prompt_template | self.llm_model | output_parser
answer = merge_chain.invoke({"user_prompt": user_prompt})
answer = merge_chain.ainvoke({"user_prompt": user_prompt})
answer["sources"] = state.get("urls", [])

state.update({self.output[0]: answer})
Expand Down
2 changes: 1 addition & 1 deletion scrapegraphai/nodes/merge_generated_scripts_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def execute(self, state: dict) -> dict:
)

merge_chain = prompt_template | self.llm_model | StrOutputParser()
answer = merge_chain.invoke({"user_prompt": user_prompt})
answer = merge_chain.ainvoke({"user_prompt": user_prompt})

state.update({self.output[0]: answer})
return state
7 changes: 5 additions & 2 deletions scrapegraphai/nodes/parse_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,13 @@ def execute(self, state: dict) -> dict:
else:
docs_transformed = docs_transformed[0]

link_urls, img_urls = self._extract_urls(docs_transformed.page_content, source)
try:
link_urls, img_urls = self._extract_urls(docs_transformed.page_content, source)
except Exception as e:
link_urls, img_urls = "", ""

chunk_size = self.chunk_size
chunk_size = min(chunk_size - 500, int(chunk_size * 0.75))
chunk_size = min(chunk_size - 500, int(chunk_size * 0.8))

if isinstance(docs_transformed, Document):
chunks = split_text_into_chunks(text=docs_transformed.page_content,
Expand Down
2 changes: 1 addition & 1 deletion scrapegraphai/nodes/prompt_refiner_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def execute(self, state: dict) -> dict:
output_parser = StrOutputParser()

chain = prompt | self.llm_model | output_parser
refined_prompt = chain.invoke({})
refined_prompt = chain.ainvoke({})

state.update({self.output[0]: refined_prompt})
return state
2 changes: 1 addition & 1 deletion scrapegraphai/nodes/reasoning_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def execute(self, state: dict) -> dict:
output_parser = StrOutputParser()

chain = prompt | self.llm_model | output_parser
refined_prompt = chain.invoke({})
refined_prompt = chain.ainvoke({})

state.update({self.output[0]: refined_prompt})
return state
2 changes: 1 addition & 1 deletion scrapegraphai/nodes/robots_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def execute(self, state: dict) -> dict:
)

chain = prompt | self.llm_model | output_parser
is_scrapable = chain.invoke({"path": source})[0]
is_scrapable = chain.ainvoke({"path": source})[0]

if "no" in is_scrapable:
self.logger.warning(
Expand Down
2 changes: 1 addition & 1 deletion scrapegraphai/nodes/search_link_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def execute(self, state: dict) -> dict:
input_variables=["content", "user_prompt"],
)
merge_chain = merge_prompt | self.llm_model | output_parser
answer = merge_chain.invoke(
answer = merge_chain.ainvoke(
{"content": chunk.page_content}
)
relevant_links += answer
Expand Down
8 changes: 4 additions & 4 deletions scrapegraphai/utils/code_error_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def syntax_focused_analysis(state: dict, llm_model) -> str:
prompt = PromptTemplate(template=TEMPLATE_SYNTAX_ANALYSIS,
input_variables=["generated_code", "errors"])
chain = prompt | llm_model | StrOutputParser()
return chain.invoke({
return chain.ainvoke({
"generated_code": state["generated_code"],
"errors": state["errors"]["syntax"]
})
Expand All @@ -51,7 +51,7 @@ def execution_focused_analysis(state: dict, llm_model) -> str:
input_variables=["generated_code", "errors",
"html_code", "html_analysis"])
chain = prompt | llm_model | StrOutputParser()
return chain.invoke({
return chain.ainvoke({
"generated_code": state["generated_code"],
"errors": state["errors"]["execution"],
"html_code": state["html_code"],
Expand All @@ -73,7 +73,7 @@ def validation_focused_analysis(state: dict, llm_model) -> str:
input_variables=["generated_code", "errors",
"json_schema", "execution_result"])
chain = prompt | llm_model | StrOutputParser()
return chain.invoke({
return chain.ainvoke({
"generated_code": state["generated_code"],
"errors": state["errors"]["validation"],
"json_schema": state["json_schema"],
Expand All @@ -97,7 +97,7 @@ def semantic_focused_analysis(state: dict, comparison_result: Dict[str, Any], ll
input_variables=["generated_code",
"differences", "explanation"])
chain = prompt | llm_model | StrOutputParser()
return chain.invoke({
return chain.ainvoke({
"generated_code": state["generated_code"],
"differences": json.dumps(comparison_result["differences"], indent=2),
"explanation": comparison_result["explanation"]
Expand Down
8 changes: 4 additions & 4 deletions scrapegraphai/utils/code_error_correction.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def syntax_focused_code_generation(state: dict, analysis: str, llm_model) -> str
prompt = PromptTemplate(template=TEMPLATE_SYNTAX_CODE_GENERATION,
input_variables=["analysis", "generated_code"])
chain = prompt | llm_model | StrOutputParser()
return chain.invoke({
return chain.ainvoke({
"analysis": analysis,
"generated_code": state["generated_code"]
})
Expand All @@ -53,7 +53,7 @@ def execution_focused_code_generation(state: dict, analysis: str, llm_model) ->
prompt = PromptTemplate(template=TEMPLATE_EXECUTION_CODE_GENERATION,
input_variables=["analysis", "generated_code"])
chain = prompt | llm_model | StrOutputParser()
return chain.invoke({
return chain.ainvoke({
"analysis": analysis,
"generated_code": state["generated_code"]
})
Expand All @@ -73,7 +73,7 @@ def validation_focused_code_generation(state: dict, analysis: str, llm_model) ->
prompt = PromptTemplate(template=TEMPLATE_VALIDATION_CODE_GENERATION,
input_variables=["analysis", "generated_code", "json_schema"])
chain = prompt | llm_model | StrOutputParser()
return chain.invoke({
return chain.ainvoke({
"analysis": analysis,
"generated_code": state["generated_code"],
"json_schema": state["json_schema"]
Expand All @@ -94,7 +94,7 @@ def semantic_focused_code_generation(state: dict, analysis: str, llm_model) -> s
prompt = PromptTemplate(template=TEMPLATE_SEMANTIC_CODE_GENERATION,
input_variables=["analysis", "generated_code", "generated_result", "reference_result"])
chain = prompt | llm_model | StrOutputParser()
return chain.invoke({
return chain.ainvoke({
"analysis": analysis,
"generated_code": state["generated_code"],
"generated_result": json.dumps(state["execution_result"], indent=2),
Expand Down
Loading