88
99# Imports from Langchain
1010from langchain .prompts import PromptTemplate
11- from langchain_core .output_parsers import JsonOutputParser
11+ from langchain_core .output_parsers import JsonOutputParser , StrOutputParser
1212from tqdm import tqdm
1313
1414from ..utils .logging import get_logger
@@ -35,7 +35,7 @@ def __init__(
3535 input : str ,
3636 output : List [str ],
3737 node_config : Optional [dict ] = None ,
38- node_name : str = "MergeAnswers " ,
38+ node_name : str = "MergeGeneratedScripts " ,
3939 ):
4040 super ().__init__ (node_name , "node" , input , output , 2 , node_config )
4141
@@ -66,15 +66,50 @@ def execute(self, state: dict) -> dict:
6666 # Fetching data from the state based on the input keys
6767 input_data = [state [key ] for key in input_keys ]
6868
69+ user_prompt = input_data [0 ]
6970 scripts = input_data [1 ]
7071
71- # merge the answers in one string
72- for i , script_str in enumerate (scripts ):
73- print (f"Script #{ i } " )
74- print ("=" * 40 )
75- print (script_str )
76- print ("-" * 40 )
72+ # merge the scripts in one string
73+ scripts_str = ""
74+ for i , script in enumerate (scripts ):
75+ scripts_str += "-----------------------------------\n "
76+ scripts_str += f"SCRIPT URL { i + 1 } \n "
77+ scripts_str += "-----------------------------------\n "
78+ scripts_str += script
79+
80+ # TODO: should we pass the schema to the output parser even if the scripts already have it implemented?
81+
82+ # schema to be used for output parsing
83+ # if self.node_config.get("schema", None) is not None:
84+ # output_schema = JsonOutputParser(pydantic_object=self.node_config["schema"])
85+ # else:
86+ # output_schema = JsonOutputParser()
87+
88+ # format_instructions = output_schema.get_format_instructions()
89+
90+ template_merge = """
91+ You are a python expert in web scraping and you have just generated multiple scripts to scrape different URLs.\n
92+ The scripts are generated based on a user question and the content of the websites.\n
93+ You need to create one single script that merges the scripts generated for each URL.\n
94+ The scraped contents are in a JSON format and you need to merge them based on the context and providing a correct JSON structure.\n
95+ The output should be just in python code without any comment and should implement the main function.\n
96+ The python script, when executed, should format the extracted information sticking to the user question and scripts output format.\n
97+ USER PROMPT: {user_prompt}\n
98+ SCRIPTS:\n
99+ {scripts}
100+ """
101+
102+ prompt_template = PromptTemplate (
103+ template = template_merge ,
104+ input_variables = ["user_prompt" ],
105+ partial_variables = {
106+ "scripts" : scripts_str ,
107+ },
108+ )
109+
110+ merge_chain = prompt_template | self .llm_model | StrOutputParser ()
111+ answer = merge_chain .invoke ({"user_prompt" : user_prompt })
77112
78113 # Update the state with the generated answer
79- state .update ({self .output [0 ]: scripts })
114+ state .update ({self .output [0 ]: answer })
80115 return state
0 commit comments