some changes on the system prompt and agent class

Jeomon · Jeomon · commit e0ba9da4eb49 · 2025-07-07T15:20:57.000+05:30
diff --git a/app.py b/app.py
@@ -10,9 +10,9 @@
 browser_instance_dir = os.getenv('BROWSER_INSTANCE_DIR')
 user_data_dir = os.getenv('USER_DATA_DIR')
 
-llm=ChatGemini(model='gemini-2.0-flash',api_key=api_key,temperature=0)
+llm=ChatGemini(model='gemini-2.5-flash',api_key=api_key,temperature=0)
 config=BrowserConfig(browser='edge',browser_instance_dir=browser_instance_dir,user_data_dir=user_data_dir,headless=False)
 
-agent=WebAgent(config=config,llm=llm,verbose=True,use_vision=False,max_iteration=100)
+agent=WebAgent(config=config,llm=llm,verbose=True,use_vision=True,max_iteration=100)
 user_query = input('Enter your query: ')
 agent.print_response(user_query)
diff --git a/src/agent/web/__init__.py b/src/agent/web/__init__.py
@@ -91,7 +91,6 @@ async def reason(self,state:AgentState):
         })
         messages=[SystemMessage(system_prompt)]+state.get('messages')
         ai_message=await self.llm.async_invoke(messages=messages)
-        # print(ai_message.content)
         agent_data=extract_agent_data(ai_message.content)
         memory=agent_data.get('Memory')
         evaluate=agent_data.get("Evaluate")
@@ -128,14 +127,14 @@ async def action(self,state:AgentState):
         action_result=await self.registry.async_execute(action_name,action_input,context=self.context)
         observation=action_result.content
         if self.verbose:
-            print(colored(f'Observation: {textwrap.shorten(observation,width=500)}',color='green',attrs=['bold']))
+            print(colored(f'Observation: {textwrap.shorten(observation,width=1000,placeholder='...')}',color='green',attrs=['bold']))
         if self.verbose and self.token_usage:
             print(f'Input Tokens: {self.llm.tokens.input} Output Tokens: {self.llm.tokens.output} Total Tokens: {self.llm.tokens.total}')
-        # Get the current browser state
+        # Get the current screenshot,browser state and dom state
         browser_state=await self.context.get_state(use_vision=self.use_vision)
+        current_tab=browser_state.current_tab
         dom_state=browser_state.dom_state
         image_obj=browser_state.screenshot
-        current_tab=browser_state.current_tab
         # Redefining the AIMessage and adding the new observation
         action_prompt=self.action_prompt.format(**{
             'memory':memory,
@@ -147,10 +146,6 @@ async def action(self,state:AgentState):
         observation_prompt=self.observation_prompt.format(**{
             'iteration':self.iteration,
             'max_iteration':self.max_iteration,
-            'memory':memory,
-            'evaluate':evaluate,
-            'thought':thought,
-            'action':f'{action_name}({','.join([f'{k}={v}' for k,v in action_input.items()])})',
             'observation':observation,
             'current_tab':current_tab.to_string(),
             'tabs':browser_state.tabs_to_string(),
@@ -189,7 +184,8 @@ async def answer(self,state:AgentState):
         message=AIMessage(answer_prompt)
         if self.verbose:
             print(colored(f'Final Answer: {final_answer}',color='cyan',attrs=['bold']))
-        return {**state,'output':final_answer,'messages':[message]}
+        await self.close()
+        return {**state,'browser_state':None,'dom_state':None,'output':final_answer,'messages':[message],'prev_observation':'','agent_data':{}}
 
     def main_controller(self,state:AgentState):
         "Route to the next node"
@@ -243,7 +239,6 @@ async def async_invoke(self, input: str)->dict|BaseModel:
         }
         self.start_time=datetime.now()
         response=await self.graph.ainvoke(state,config={'recursion_limit':self.max_iteration})
-        await self.close()
         self.end_time=datetime.now()
         total_seconds=(self.end_time-self.start_time).total_seconds()
         if self.verbose and self.token_usage:
diff --git a/src/agent/web/prompt/system.md b/src/agent/web/prompt/system.md
@@ -60,7 +60,7 @@ At every step, Web Agent will be given the state:
 
 Web Agent must follow the following rules while browsing the web:
 
-1. ALWAYS start solving the given query using the appropirate search domains like google, youtube, wikipaedia, ...etc.
+1. ALWAYS start solving the given query using the appropirate search domains like google, youtube, wikipaedia, twitter ...etc.
 2. When performing deep research make sure conduct it in a seperate tab using `Tab Tool` and not on the current working tab.
 3. If any banners or ads those are obstructing the way close it and accept cookies if you see in the page.
 4. If a captcha appears, attempt solving it if possible or else use fallback strategies (ex: go back, alternative site).