minor convenient updates

recursix · recursix · commit 1f76e35b8ff6 · 2025-07-11T13:54:45.000-04:00
diff --git a/_sandbox b/_sandbox
@@ -0,0 +1,35 @@
+# import gradio as gr
+# import pandas as pd
+
+# df = pd.DataFrame({"A": [14, 4, 5, 4, 1], "B": [5, 2, 54, 3, 2], "C": [20, 20, 7, 3, 8]})
+
+
+# # Highlight entire rows based on conditions
+# def highlight_rows(row):
+#     if row["A"] > 4:
+#         return ["background-color: darkblue"] * len(row)
+#     else:
+#         return [""] * len(row)
+
+
+# styler = df.style.apply(highlight_rows, axis=1)
+
+# with gr.Blocks() as demo:
+#     gr.Dataframe(styler)
+# demo.launch()
+
+
+def format_function_call(func_name, kwargs=None):
+    """Format a function name and kwargs dict into a Python function call string."""
+    if kwargs is None:
+        kwargs = {}
+
+    if not kwargs:
+        return f"{func_name}()"
+
+    args_str = ", ".join(f"{key}={repr(value)}" for key, value in kwargs.items())
+    return f"{func_name}({args_str})"
+
+
+print(format_function_call("my_function", {"arg1": 42, "arg2": """Marc's 17" laptop"""}))
+print(format_function_call("my_function", {"arg1": 42, "arg2": "17' pole"}))
diff --git a/main_workarena_debug.py b/main_workarena_debug.py
@@ -7,36 +7,45 @@
 """
 
 import logging
+from copy import deepcopy
 
 import bgym
 
 from agentlab.agents.tool_use_agent.tool_use_agent import (
     DEFAULT_PROMPT_CONFIG,
-    GPT_4_1_MINI,
-    OPENAI_MODEL_CONFIG,
+    GPT_4_1,
     ToolUseAgentArgs,
 )
 from agentlab.experiments.study import Study
 
 logging.getLogger().setLevel(logging.INFO)
 
-agent_config = ToolUseAgentArgs(
-    model_args=GPT_4_1_MINI,
-    config=DEFAULT_PROMPT_CONFIG,
-)
+config = deepcopy(DEFAULT_PROMPT_CONFIG)
+# config.keep_last_n_obs = 1
+config.obs.use_som = True
 
 
-agent_config.config.action_subsets = ("workarena",)  # use the workarena action set
+agent_configs = [
+    ToolUseAgentArgs(
+        model_args=GPT_4_1,
+        config=config,
+    ),
+    # ToolUseAgentArgs(
+    #     model_args=GPT_4_1,
+    #     config=config,
+    # ),
+]
 
-agent_args = [agent_config]
+for agent_config in agent_configs:
+    agent_config.config.action_subsets = ("workarena",)  # use the workarena action set
 
 
 # ## select the benchmark to run on
 # benchmark = "miniwob_tiny_test"
 benchmark = "workarena_l1"
 
 
-benchmark = bgym.DEFAULT_BENCHMARKS[benchmark]()  # type: bgym.Benchmark
+benchmark = bgym.DEFAULT_BENCHMARKS[benchmark](n_repeats=4)  # type: bgym.Benchmark
 benchmark = benchmark.subset_from_glob("task_name", "*create*")
 
 # for env_args in benchmark.env_args_list:
@@ -58,7 +67,7 @@
         study.find_incomplete(include_errors=True)
 
     else:
-        study = Study(agent_args, benchmark, logging_level_stdout=logging.WARNING)
+        study = Study(agent_configs, benchmark, logging_level_stdout=logging.WARNING)
 
     study.run(
         n_jobs=n_jobs,