ScrapeGraphAI
diff --git a/‎scrapegraphai/graphs/abstract_graph.py‎
Lines changed: 1 addition & 0 deletions b/‎scrapegraphai/graphs/abstract_graph.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎scrapegraphai/nodes/generate_answer_csv_node.py‎
Lines changed: 12 additions & 11 deletions b/‎scrapegraphai/nodes/generate_answer_csv_node.py‎
Lines changed: 12 additions & 11 deletions
diff --git a/‎scrapegraphai/nodes/generate_scraper_node.py‎
Lines changed: 0 additions & 2 deletions b/‎scrapegraphai/nodes/generate_scraper_node.py‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎scrapegraphai/nodes/get_probable_tags_node.py‎
Lines changed: 0 additions & 4 deletions b/‎scrapegraphai/nodes/get_probable_tags_node.py‎
Lines changed: 0 additions & 4 deletions
diff --git a/‎scrapegraphai/nodes/graph_iterator_node.py‎
Lines changed: 0 additions & 3 deletions b/‎scrapegraphai/nodes/graph_iterator_node.py‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎scrapegraphai/nodes/merge_answers_node.py‎
Lines changed: 0 additions & 5 deletions b/‎scrapegraphai/nodes/merge_answers_node.py‎
Lines changed: 0 additions & 5 deletions
diff --git a/‎scrapegraphai/nodes/parse_node.py‎
Lines changed: 1 addition & 4 deletions b/‎scrapegraphai/nodes/parse_node.py‎
Lines changed: 1 addition & 4 deletions
diff --git a/‎scrapegraphai/nodes/rag_node.py‎
Lines changed: 1 addition & 11 deletions b/‎scrapegraphai/nodes/rag_node.py‎
Lines changed: 1 addition & 11 deletions
diff --git a/‎scrapegraphai/nodes/robots_node.py‎
Lines changed: 0 additions & 2 deletions b/‎scrapegraphai/nodes/robots_node.py‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎scrapegraphai/nodes/search_internet_node.py‎
Lines changed: 1 addition & 6 deletions b/‎scrapegraphai/nodes/search_internet_node.py‎
Lines changed: 1 addition & 6 deletions
@@ -149,6 +149,7 @@ def handle_model(model_name, provider, token_key, default_token=8192):
                          "ollama", "oneapi", "nvidia", "groq", "google_vertexai", 
                          "bedrock", "mistralai", "hugging_face", "deepseek", "ernie", "fireworks"]
 
+
         if llm_params["model"].split("/")[0] not in known_models and llm_params["model"].split("-")[0] not in known_models:
             raise ValueError(f"Model '{llm_params['model']}' is not supported")
 
 
@@ -9,7 +9,8 @@
 from tqdm import tqdm
 from ..utils.logging import get_logger
 from .base_node import BaseNode
-from ..prompts.generate_answer_node_csv_prompts import TEMPLATE_CHUKS_CSV, TEMPLATE_NO_CHUKS_CSV, TEMPLATE_MERGE_CSV
+from ..prompts.generate_answer_node_csv_prompts import (TEMPLATE_CHUKS_CSV,
+                                                        TEMPLATE_NO_CHUKS_CSV, TEMPLATE_MERGE_CSV)
 
 class GenerateAnswerCSVNode(BaseNode):
     """
@@ -95,22 +96,22 @@ def execute(self, state):
         else:
             output_parser = JsonOutputParser()
 
-        TEMPLATE_NO_CHUKS_CSV_prompt = TEMPLATE_NO_CHUKS_CSV
-        TEMPLATE_CHUKS_CSV_prompt = TEMPLATE_CHUKS_CSV
-        TEMPLATE_MERGE_CSV_prompt  = TEMPLATE_MERGE_CSV
+        TEMPLATE_NO_CHUKS_CSV_PROMPT = TEMPLATE_NO_CHUKS_CSV
+        TEMPLATE_CHUKS_CSV_PROMPT = TEMPLATE_CHUKS_CSV
+        TEMPLATE_MERGE_CSV_PROMPT  = TEMPLATE_MERGE_CSV
 
         if self.additional_info is not None:
-            TEMPLATE_NO_CHUKS_CSV_prompt = self.additional_info + TEMPLATE_NO_CHUKS_CSV
-            TEMPLATE_CHUKS_CSV_prompt = self.additional_info + TEMPLATE_CHUKS_CSV
-            TEMPLATE_MERGE_CSV_prompt = self.additional_info + TEMPLATE_MERGE_CSV
+            TEMPLATE_NO_CHUKS_CSV_PROMPT = self.additional_info + TEMPLATE_NO_CHUKS_CSV
+            TEMPLATE_CHUKS_CSV_PROMPT = self.additional_info + TEMPLATE_CHUKS_CSV
+            TEMPLATE_MERGE_CSV_PROMPT = self.additional_info + TEMPLATE_MERGE_CSV
 
         format_instructions = output_parser.get_format_instructions()
 
         chains_dict = {}
 
         if len(doc) == 1:
             prompt = PromptTemplate(
-                template=TEMPLATE_NO_CHUKS_CSV_prompt,
+                template=TEMPLATE_NO_CHUKS_CSV_PROMPT,
                 input_variables=["question"],
                 partial_variables={
                     "context": doc,
@@ -127,7 +128,7 @@ def execute(self, state):
             tqdm(doc, desc="Processing chunks", disable=not self.verbose)
         ):
             prompt = PromptTemplate(
-                    template=TEMPLATE_CHUKS_CSV_prompt,
+                    template=TEMPLATE_CHUKS_CSV_PROMPT,
                     input_variables=["question"],
                     partial_variables={
                         "context": chunk,
@@ -144,7 +145,7 @@ def execute(self, state):
         batch_results =  async_runner.invoke({"question": user_prompt})
 
         merge_prompt = PromptTemplate(
-                template = TEMPLATE_MERGE_CSV_prompt,
+                template = TEMPLATE_MERGE_CSV_PROMPT,
                 input_variables=["context", "question"],
                 partial_variables={"format_instructions": format_instructions},
             )
@@ -153,4 +154,4 @@ def execute(self, state):
         answer = merge_chain.invoke({"context": batch_results, "question": user_prompt})
 
         state.update({self.output[0]: answer})
-        return state
+        return state
@@ -67,10 +67,8 @@ def execute(self, state: dict) -> dict:
 
         self.logger.info(f"--- Executing {self.node_name} Node ---")
 
-        # Interpret input keys based on the provided input expression
         input_keys = self.get_input_keys(state)
 
-        # Fetching data from the state based on the input keys
         input_data = [state[key] for key in input_keys]
 
         user_prompt = input_data[0]
 
@@ -58,10 +58,8 @@ def execute(self, state: dict) -> dict:
 
         self.logger.info(f"--- Executing {self.node_name} Node ---")
 
-        # Interpret input keys based on the provided input expression
         input_keys = self.get_input_keys(state)
 
-        # Fetching data from the state based on the input keys
         input_data = [state[key] for key in input_keys]
 
         user_prompt = input_data[0]
@@ -88,10 +86,8 @@ def execute(self, state: dict) -> dict:
             },
         )
 
-        # Execute the chain to get probable tags
         tag_answer = tag_prompt | self.llm_model | output_parser
         probable_tags = tag_answer.invoke({"question": user_prompt})
 
-        # Update the dictionary with probable tags
         state.update({self.output[0]: probable_tags})
         return state
@@ -103,7 +103,6 @@ async def _async_execute(self, state: dict, batchsize: int) -> dict:
         if graph_instance is None:
             raise ValueError("graph instance is required for concurrent execution")
 
-        # Assign depth level to the graph
         if "graph_depth" in graph_instance.config:
             graph_instance.config["graph_depth"] += 1
         else:
@@ -113,14 +112,12 @@ async def _async_execute(self, state: dict, batchsize: int) -> dict:
 
         participants = []
 
-        # semaphore to limit the number of concurrent tasks
         semaphore = asyncio.Semaphore(batchsize)
 
         async def _async_run(graph):
             async with semaphore:
                 return await asyncio.to_thread(graph.run)
 
-        # creates a deepcopy of the graph instance for each endpoint
         for url in urls:
             instance = copy.copy(graph_instance)
             instance.source = url
 
@@ -56,21 +56,17 @@ def execute(self, state: dict) -> dict:
 
         self.logger.info(f"--- Executing {self.node_name} Node ---")
 
-        # Interpret input keys based on the provided input expression
         input_keys = self.get_input_keys(state)
 
-        # Fetching data from the state based on the input keys
         input_data = [state[key] for key in input_keys]
 
         user_prompt = input_data[0]
         answers = input_data[1]
 
-        # merge the answers in one string
         answers_str = ""
         for i, answer in enumerate(answers):
             answers_str += f"CONTENT WEBSITE {i+1}: {answer}\n"
 
-        # Initialize the output parser
         if self.node_config.get("schema", None) is not None:
             output_parser = JsonOutputParser(pydantic_object=self.node_config["schema"])
         else:
@@ -90,6 +86,5 @@ def execute(self, state: dict) -> dict:
         merge_chain = prompt_template | self.llm_model | output_parser
         answer = merge_chain.invoke({"user_prompt": user_prompt})
 
-        # Update the state with the generated answer
         state.update({self.output[0]: answer})
         return state
@@ -59,13 +59,11 @@ def execute(self, state: dict) -> dict:
 
         self.logger.info(f"--- Executing {self.node_name} Node ---")
 
-        # Interpret input keys based on the provided input expression
         input_keys = self.get_input_keys(state)
 
-        # Fetching data from the state based on the input keys
         input_data = [state[key] for key in input_keys]
-        # Parse the document
         docs_transformed = input_data[0]
+
         if self.parse_html:
             docs_transformed = Html2TextTransformer().transform_documents(input_data[0])
             docs_transformed = docs_transformed[0]
@@ -77,7 +75,6 @@ def execute(self, state: dict) -> dict:
         else:
             docs_transformed = docs_transformed[0]
 
-            # Adapt the chunk size, leaving room for the reply, the prompt and the schema
             chunk_size = self.node_config.get("chunk_size", 4096)
             chunk_size = min(chunk_size - 500, int(chunk_size * 0.9))
 
 
@@ -80,10 +80,8 @@ def execute(self, state: dict) -> dict:
 
         self.logger.info(f"--- Executing {self.node_name} Node ---")
 
-        # Interpret input keys based on the provided input expression
         input_keys = self.get_input_keys(state)
 
-        # Fetching data from the state based on the input keys
         input_data = [state[key] for key in input_keys]
 
         user_prompt = input_data[0]
@@ -102,7 +100,6 @@ def execute(self, state: dict) -> dict:
 
         self.logger.info("--- (updated chunks metadata) ---")
 
-        # check if embedder_model is provided, if not use llm_model
         if self.embedder_model is not None:
             embeddings = self.embedder_model
         elif 'embeddings' in self.node_config:
@@ -144,23 +141,17 @@ def execute(self, state: dict) -> dict:
         pipeline_compressor = DocumentCompressorPipeline(
             transformers=[redundant_filter, relevant_filter]
         )
-        # redundant + relevant filter compressor
         compression_retriever = ContextualCompressionRetriever(
             base_compressor=pipeline_compressor, base_retriever=retriever
         )
 
-        # relevant filter compressor only
-        # compression_retriever = ContextualCompressionRetriever(
-        #     base_compressor=relevant_filter, base_retriever=retriever
-        # )
-
         compressed_docs = compression_retriever.invoke(user_prompt)
 
         self.logger.info("--- (tokens compressed and vector stored) ---")
 
         state.update({self.output[0]: compressed_docs})
         return state
-    
+
 
     def _create_default_embedder(self, llm_config=None) -> object:
         """
@@ -223,7 +214,6 @@ def _create_embedder(self, embedder_config: dict) -> object:
         embedder_params = {**embedder_config}
         if "model_instance" in embedder_config:
             return embedder_params["model_instance"]
-        # Instantiate the embedding model based on the model name
         if "openai" in embedder_params["model"]:
             return OpenAIEmbeddings(api_key=embedder_params["api_key"])
         if "azure" in embedder_params["model"]:
 
@@ -75,10 +75,8 @@ def execute(self, state: dict) -> dict:
 
         self.logger.info(f"--- Executing {self.node_name} Node ---")
 
-        # Interpret input keys based on the provided input expression
         input_keys = self.get_input_keys(state)
 
-        # Fetching data from the state based on the input keys
         input_data = [state[key] for key in input_keys]
 
         source = input_data[0]
 
@@ -67,7 +67,6 @@ def execute(self, state: dict) -> dict:
 
         input_keys = self.get_input_keys(state)
 
-        # Fetching data from the state based on the input keys
         input_data = [state[key] for key in input_keys]
 
         user_prompt = input_data[0]
@@ -79,10 +78,8 @@ def execute(self, state: dict) -> dict:
             input_variables=["user_prompt"],
         )
 
-        # Execute the chain to get the search query
         search_answer = search_prompt | self.llm_model | output_parser
-        
-        # Ollama: Use no json format when creating the search query
+
         if isinstance(self.llm_model, ChatOllama) and self.llm_model.format == 'json':
             self.llm_model.format = None
             search_query = search_answer.invoke({"user_prompt": user_prompt})[0]
@@ -96,9 +93,7 @@ def execute(self, state: dict) -> dict:
                                search_engine=self.search_engine)
 
         if len(answer) == 0:
-            # raise an exception if no answer is found
             raise ValueError("Zero results found for the search query.")
 
-        # Update the state with the generated answer
         state.update({self.output[0]: answer})
         return state