ServiceNow
diff --git a/‎src/agentlab/agents/README.md‎
Lines changed: 2 additions & 2 deletions b/‎src/agentlab/agents/README.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/agentlab/agents/generic_agent/reproducibility_agent.py‎
Lines changed: 5 additions & 8 deletions b/‎src/agentlab/agents/generic_agent/reproducibility_agent.py‎
Lines changed: 5 additions & 8 deletions
diff --git a/‎src/agentlab/analyze/agent_xray.py‎
Lines changed: 32 additions & 14 deletions b/‎src/agentlab/analyze/agent_xray.py‎
Lines changed: 32 additions & 14 deletions
diff --git a/‎src/agentlab/analyze/inspect_results.py‎
Lines changed: 7 additions & 6 deletions b/‎src/agentlab/analyze/inspect_results.py‎
Lines changed: 7 additions & 6 deletions
diff --git a/‎src/agentlab/experiments/exp_utils.py‎
Lines changed: 4 additions & 4 deletions b/‎src/agentlab/experiments/exp_utils.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎src/agentlab/experiments/launch_exp.py‎
Lines changed: 6 additions & 4 deletions b/‎src/agentlab/experiments/launch_exp.py‎
Lines changed: 6 additions & 4 deletions
@@ -99,7 +99,7 @@ have to specify the type of each field (You can use Any if it is unknown)*
 ```python
 from dataclasses import dataclass
 from browsergym.experiment.agent import Agent
-from browsergym.experiment.loop import AgentArgs
+from agentlab.experiments.loop import AgentArgs
 
 
 @dataclass
@@ -116,7 +116,7 @@ class CustomAgentArgs(AgentArgs):
 To run experiments with your custom agent, define an instance of `ExpArgs` with the required parameters.
 
 ```python
-from browsergym.experiment.loop import ExpArgs
+from agentlab.experiments.loop import ExpArgs
 
 exp_args = ExpArgs(
     agent_args=CustomAgentArgs(custom_param="value"),
 
@@ -20,13 +20,10 @@
 
 import bgym
 from browsergym.experiments.agent import AgentInfo
-from browsergym.experiments.loop import ExpArgs, ExpResult, yield_all_exp_results
 from bs4 import BeautifulSoup
-from langchain.schema import AIMessage, BaseMessage
-from langchain_community.adapters.openai import convert_message_to_dict
 
 from agentlab.agents.agent_args import AgentArgs
-from agentlab.agents.dynamic_prompting import ActionFlags
+from agentlab.experiments.loop import ExpArgs, ExpResult, yield_all_exp_results
 from agentlab.experiments.study import Study
 from agentlab.llm.chat_api import make_assistant_message
 from agentlab.llm.llm_utils import Discussion, messages_to_dict
@@ -65,7 +62,6 @@ def get_stats(self):
 
 @dataclass
 class ReproAgentArgs(GenericAgentArgs):
-
     # starting with "_" will prevent from being part of the index in the load_results function
     _repro_dir: str = None
 
@@ -81,7 +77,6 @@ def make_agent(self):
 
 
 class ReproAgent(GenericAgent):
-
     def __init__(
         self,
         chat_model_args,
@@ -93,7 +88,6 @@ def __init__(
         super().__init__(chat_model_args, flags, max_retry)
 
     def get_action(self, obs):
-
         # replace the chat model with a reproducible chat that will mimic the
         # same answers
         step = len(self.actions)
@@ -218,7 +212,10 @@ def make_repro_agent(agent_args: AgentArgs, exp_dir: Path | str):
 
 def _make_diff(old_str, new_str):
     page = difflib.HtmlDiff().make_file(
-        old_str.splitlines(), new_str.splitlines(), fromdesc="Old Version", todesc="New Version"
+        old_str.splitlines(),
+        new_str.splitlines(),
+        fromdesc="Old Version",
+        todesc="New Version",
     )
     page = page.replace('nowrap="nowrap"', "")  # Remove nowrap attribute
     page = _set_style(page, DIFF_STYLE)
 
@@ -12,13 +12,13 @@
 import numpy as np
 import pandas as pd
 from attr import dataclass
-from browsergym.experiments.loop import ExpResult, StepInfo
 from langchain.schema import BaseMessage, HumanMessage
 from openai import OpenAI
 from PIL import Image
 
 from agentlab.analyze import inspect_results
 from agentlab.experiments.exp_utils import RESULTS_DIR
+from agentlab.experiments.loop import ExpResult, StepInfo
 from agentlab.experiments.study import get_most_recent_study
 from agentlab.llm.chat_api import make_system_message, make_user_message
 from agentlab.llm.llm_utils import BaseMessage as AgentLabBaseMessage
@@ -201,7 +201,6 @@ def run_gradio(results_dir: Path):
 """
             )
         with gr.Row():
-
             exp_dir_choice = gr.Dropdown(
                 choices=get_directory_contents(results_dir),
                 value=select_dir_instructions,
@@ -297,7 +296,10 @@ def run_gradio(results_dir: Path):
             state_error = gr.Markdown(label="Next Step Error", elem_classes="my-markdown")
 
         profiling_gr = gr.Image(
-            label="Profiling", show_label=False, interactive=False, show_download_button=False
+            label="Profiling",
+            show_label=False,
+            interactive=False,
+            show_download_button=False,
         )
 
         gr.HTML(
@@ -418,7 +420,14 @@ def run_gradio(results_dir: Path):
         exp_dir_choice.change(
             fn=new_exp_dir,
             inputs=exp_dir_choice,
-            outputs=[agent_table, agent_id, constants, variables, global_stats, error_report],
+            outputs=[
+                agent_table,
+                agent_id,
+                constants,
+                variables,
+                global_stats,
+                error_report,
+            ],
         )
 
         agent_table.select(fn=on_select_agent, inputs=agent_table, outputs=[agent_id])
@@ -454,7 +463,8 @@ def run_gradio(results_dir: Path):
         screenshot_gallery.select(fn=gallery_step_change, inputs=episode_id, outputs=step_id)
         step_id.change(fn=if_active("DOM HTML")(update_html), outputs=html_code)
         step_id.change(
-            fn=if_active("Pruned DOM HTML")(update_pruned_html), outputs=pruned_html_code
+            fn=if_active("Pruned DOM HTML")(update_pruned_html),
+            outputs=pruned_html_code,
         )
         step_id.change(fn=if_active("AXTree")(update_axtree), outputs=axtree_code)
         step_id.change(fn=if_active("Chat Messages")(update_chat_messages), outputs=chat_messages)
@@ -475,10 +485,14 @@ def run_gradio(results_dir: Path):
         # we need to update them individually when the tab is selected
         tab_screenshot.select(fn=update_screenshot, inputs=som_or_not, outputs=screenshot)
         tab_screenshot_pair.select(
-            fn=update_screenshot_pair, inputs=som_or_not, outputs=[screenshot1, screenshot2]
+            fn=update_screenshot_pair,
+            inputs=som_or_not,
+            outputs=[screenshot1, screenshot2],
         )
         tab_screenshot_gallery.select(
-            fn=update_screenshot_gallery, inputs=som_or_not, outputs=[screenshot_gallery]
+            fn=update_screenshot_gallery,
+            inputs=som_or_not,
+            outputs=[screenshot_gallery],
         )
         tab_html.select(fn=update_html, outputs=html_code)
         tab_pruned_html.select(fn=update_pruned_html, outputs=pruned_html_code)
@@ -617,7 +631,7 @@ def update_logs():
     try:
         return f"""{info.exp_result.logs}"""
     except FileNotFoundError:
-        return f"""No Logs"""
+        return """No Logs"""
 
 
 def update_stats():
@@ -757,11 +771,11 @@ def get_episode_info(info: Info):
 
         info = f"""\
 ### {env_args.task_name} (seed: {env_args.task_seed})
-### Step {info.step} / {len(steps_info)-1} (Reward: {cum_reward:.1f})
+### Step {info.step} / {len(steps_info) - 1} (Reward: {cum_reward:.1f})
 
 **Goal:**
 
-{code(str(AgentLabBaseMessage('', goal)))}
+{code(str(AgentLabBaseMessage("", goal)))}
 
 **Task info:**
 
@@ -770,7 +784,7 @@ def get_episode_info(info: Info):
 **exp_dir:**
 
 <small style="line-height: 1; margin: 0; padding: 0;">{code(exp_dir_str)}</small>"""
-    except Exception as e:
+    except Exception:
         info = f"""\
 **Error while getting episode info**
 {code(traceback.format_exc())}"""
@@ -942,7 +956,6 @@ def update_error_report():
 
 
 def new_exp_dir(exp_dir, progress=gr.Progress(), just_refresh=False):
-
     if exp_dir == select_dir_instructions:
         return None, None
 
@@ -1075,7 +1088,6 @@ def add_patch(ax, start, stop, color, label, edge=False):
 
 
 def plot_profiling(ax, step_info_list: list[StepInfo], summary_info: dict, progress_fn):
-
     if len(step_info_list) == 0:
         warning("No step info to plot")
         return None
@@ -1123,7 +1135,13 @@ def plot_profiling(ax, step_info_list: list[StepInfo], summary_info: dict, progr
 
         if step_info.action is not None:
             # Blue rectangle for agent_start to agent_stop
-            add_patch(ax, prof.agent_start, prof.agent_stop, colors[10], labels.pop("agent", None))
+            add_patch(
+                ax,
+                prof.agent_start,
+                prof.agent_stop,
+                colors[10],
+                labels.pop("agent", None),
+            )
 
             # Black vertical bar at agent stop
             ax.axvline(prof.agent_stop, color="black", linewidth=3)
 
@@ -10,10 +10,11 @@
 
 import numpy as np
 import pandas as pd
-from browsergym.experiments.loop import ExpResult, get_exp_result, yield_all_exp_results
 from IPython.display import display
 from tqdm import tqdm
 
+from agentlab.experiments.loop import ExpResult, get_exp_result, yield_all_exp_results
+
 # TODO find a more portable way to code set_task_category_as_index at least
 # handle dynamic imports. We don't want to always import workarena
 # from browsergym.workarena import TASK_CATEGORY_MAP
@@ -83,7 +84,7 @@ def set_index_from_variables(
         white = any([fnmatch.fnmatch(var, pattern) for pattern in index_white_list])
         black = any([fnmatch.fnmatch(var, pattern) for pattern in index_black_list])
 
-        if white and (not black) and (not var in index_variables):
+        if white and (not black) and (var not in index_variables):
             index_variables.append(var)
 
     for var in index_variables:
@@ -205,7 +206,7 @@ def report_constant_and_variables(df, show_stack_traces=True):
             if i >= 2:
                 break
         if len(unique_counts) > 3:
-            print(f"        ...\n")
+            print("        ...\n")
 
 
 def get_std_err(df, metric):
@@ -235,7 +236,7 @@ def get_sample_std_err(df, metric):
 
 
 def summarize(sub_df):
-    if not "cum_reward" in sub_df:
+    if "cum_reward" not in sub_df:
         record = dict(
             avg_reward=np.nan,
             std_err=np.nan,
@@ -745,7 +746,7 @@ def summarize_study(result_df: pd.DataFrame) -> pd.DataFrame:
 def split_by_key(df: pd.DataFrame, key):
     """Return a dict of dataframes spearted by the given key."""
     # check if key in df
-    if not (key in df.columns):
+    if key not in df.columns:
         df = df.reset_index(key, inplace=False)
 
     df_dict = {}
@@ -775,7 +776,7 @@ def get_all_summaries(results_dir: Path, skip_hidden=True, ignore_cache=False, i
                 summary.set_index("study_dir", inplace=True)
                 summaries.append(summary)
 
-        except Exception as e:
+        except Exception:
             traceback.print_exc()
             continue
 
 
@@ -6,9 +6,10 @@
 from pathlib import Path
 from time import sleep, time
 
-from browsergym.experiments.loop import ExpArgs, yield_all_exp_results
 from tqdm import tqdm
 
+from agentlab.experiments.loop import ExpArgs, yield_all_exp_results
+
 logger = logging.getLogger(__name__)  # Get logger based on module name
 
 
@@ -63,7 +64,6 @@ def timeout_manager(seconds: int = None):
         return
 
     def alarm_handler(signum, frame):
-
         logger.warning(f"Operation timed out after {seconds}s, raising TimeoutError.")
         # send sigint
         # os.kill(os.getpid(), signal.SIGINT) # this doesn't seem to do much I don't know why
@@ -176,11 +176,11 @@ def hide_some_exp(base_dir, filter: callable, just_test):
 
     msg = f"Searching {len(exp_list)} experiments to move to _* expriments where `filter(exp_args)` is True."
     if just_test:
-        msg += f"\nNote: This is a just a test, no experiments will be moved. Set `just_test=False` to move them."
+        msg += "\nNote: This is a just a test, no experiments will be moved. Set `just_test=False` to move them."
 
     logging.info(msg)
 
-    exp_list = tqdm(exp_list, desc=f"Filtering experiments.")
+    exp_list = tqdm(exp_list, desc="Filtering experiments.")
 
     filtered_out = []
     for exp in exp_list:
 
@@ -3,9 +3,9 @@
 from pathlib import Path
 
 import bgym
-from browsergym.experiments.loop import ExpArgs, yield_all_exp_results
 
 from agentlab.experiments.exp_utils import run_exp
+from agentlab.experiments.loop import ExpArgs, yield_all_exp_results
 
 
 def run_experiments(
@@ -142,8 +142,8 @@ def find_incomplete(study_dir: str | Path, include_errors=True):
     else:
         logging.info(f"Found {job_count} incomplete experiments in {study_dir}.")
 
-    message = f"Make sure the processes that were running are all stopped. Otherwise, "
-    f"there will be concurrent writing in the same directories.\n"
+    message = "Make sure the processes that were running are all stopped. Otherwise, "
+    "there will be concurrent writing in the same directories.\n"
 
     logging.info(message)
 
@@ -193,7 +193,9 @@ def _hide_completed(exp_result: bgym.ExpResult, include_errors: bool = True):
 
 
 # TODO remove this function once ray backend is stable
-def _split_sequential_exp(exp_args_list: list[ExpArgs]) -> tuple[list[ExpArgs], list[ExpArgs]]:
+def _split_sequential_exp(
+    exp_args_list: list[ExpArgs],
+) -> tuple[list[ExpArgs], list[ExpArgs]]:
     """split exp_args that are flagged as sequential from those that are not"""
     sequential_exp_args = []
     parallel_exp_args = []