add docstring for functions and black

amanjaiswal73892 · amanjaiswal73892 · commit 958430ccc6e1 · 2025-09-02T16:11:32.000-04:00
diff --git a/src/agentlab/agents/agent_utils.py b/src/agentlab/agents/agent_utils.py
@@ -1,8 +1,10 @@
 import copy
+
 from PIL import Image, ImageDraw
 from playwright.sync_api import Page
-from agentlab.llm.llm_utils import img_to_base_64
+
 from agentlab.analyze import overlay_utils
+from agentlab.llm.llm_utils import img_to_base_64
 
 
 def draw_mouse_pointer(image: Image.Image, x: int, y: int) -> Image.Image:
@@ -139,6 +141,3 @@ def overlay_action(obs, action):
     act_img = Image.fromarray(act_img)
     overlay_utils.annotate_action(act_img, action, properties=obs["extra_element_properties"])
     return img_to_base_64(act_img)
-
-
-
diff --git a/src/agentlab/agents/hitl_agent/generic_human_guided_agent.py b/src/agentlab/agents/hitl_agent/generic_human_guided_agent.py
@@ -11,6 +11,7 @@
 from PIL import Image
 
 from agentlab.agents import dynamic_prompting as dp
+from agentlab.agents.agent_utils import overlay_action
 from agentlab.agents.generic_agent.generic_agent import GenericAgent, GenericAgentArgs
 from agentlab.agents.generic_agent.generic_agent_prompt import MainPrompt
 from agentlab.agents.hitl_agent.hint_labelling import (
@@ -22,10 +23,10 @@
     Discussion,
     HumanMessage,
     SystemMessage,
+    img_to_base_64,
 )
 from agentlab.llm.tracking import cost_tracker_decorator
-from agentlab.agents.agent_utils import overlay_action
-from agentlab.llm.llm_utils import img_to_base_64
+
 
 class CandidatesGeneration(dp.PromptElement):
     # Ask for multiple alternatives; each candidate must contain <think> and <action>.
@@ -323,6 +324,16 @@ def get_action(self, obs):
 
 
 def get_base_agent(llm_config):
+    """Creates and returns a MultipleProposalGenericAgentArgs instance with
+    specified LLM configuration from CHAT_MODEL_ARGS_DICT.
+
+    Args:
+        llm_config: The LLM configuration key to use from CHAT_MODEL_ARGS_DICT.
+
+    Returns:
+        MultipleProposalGenericAgentArgs: Configured agent arguments instance.
+    """
+
     from agentlab.agents.generic_agent.tmlr_config import BASE_FLAGS
     from agentlab.llm.llm_configs import CHAT_MODEL_ARGS_DICT
 
diff --git a/src/agentlab/agents/hitl_agent/hitl_agent.py b/src/agentlab/agents/hitl_agent/hitl_agent.py
@@ -90,6 +90,7 @@ def get_action(self, obs):
                     candidates = self.subagent.get_candidate_generations(
                         obs, hint=step_hint if step_hint else None, n_candidates=3
                     )
+                    step_n_human_intervention_rounds += 1
                     suggestions = [
                         {"action": c["action"], "think": c["agent_info"].think} for c in candidates
                     ]
@@ -156,6 +157,19 @@ def set_reproducibility_mode(self):
 
 
 def get_base_human_in_the_loop_genericagent(llm_config):
+    """
+    Create a base human-in-the-loop generic agent configuration using the key from CHAT_MODEL_ARGS_DICT.
+
+    This function creates a HumanInTheLoopAgentArgs instance with a MultiCandidateGenericAgent
+    as the subagent, configured with the specified LLM configuration and base flags.
+
+    Args:
+        llm_config (str): The LLM configuration key to use from CHAT_MODEL_ARGS_DICT.
+
+    Returns:
+        HumanInTheLoopAgentArgs: Configured human-in-the-loop agent arguments with
+                                a multi-candidate generic agent as the subagent.
+    """
     from agentlab.agents.generic_agent.tmlr_config import BASE_FLAGS
     from agentlab.agents.hitl_agent.hitl_agent import HumanInTheLoopAgentArgs
     from agentlab.agents.hitl_agent.multi_candidate_generic_agent import (
diff --git a/tutorials/2_eval_on_miniwob/inspect_results.ipynb b/tutorials/2_eval_on_miniwob/inspect_results.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "id": "58086537",
    "metadata": {},
    "outputs": [],
@@ -25,10 +25,47 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
+   "id": "7901cccc",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "PosixPath('/Users/aman.jaiswal/Work/AgentLab.worktrees/trace-recorder/results')"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "RESULTS_DIR"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
    "id": "50be19a9",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/Users/aman.jaiswal/Work/AgentLab.worktrees/trace-recorder/results/2025-09-02_15-52-00_hitl-genericagent-gpt-5-mini-2025-08-07-on-workarena-l1-task-name-create\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Searching experiments directories.: 100%|██████████| 1/1 [00:00<00:00, 5433.04it/s]\n",
+      "Loading results: 100%|██████████| 1/1 [00:00<00:00, 373.26it/s]\n"
+     ]
+    }
+   ],
    "source": [
     "# replace this by your desired directory if needed.\n",
     "result_dir = get_most_recent_study(RESULTS_DIR, contains=None)\n",
@@ -39,15 +76,222 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
+   "id": "82cc1557",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "PosixPath('/Users/aman.jaiswal/Work/AgentLab.worktrees/trace-recorder/results/2025-09-02_15-52-00_hitl-genericagent-gpt-5-mini-2025-08-07-on-workarena-l1-task-name-create')"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "result_dir"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
    "id": "a424c470",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Found multiple configuration, averaging across tasks and returning a per-agent report.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<style type=\"text/css\">\n",
+       "#T_1d2fe th {\n",
+       "  white-space: pre-wrap;\n",
+       "}\n",
+       "</style>\n",
+       "<table id=\"T_1d2fe\">\n",
+       "  <thead>\n",
+       "    <tr>\n",
+       "      <th class=\"blank level0\" >&nbsp;</th>\n",
+       "      <th id=\"T_1d2fe_level0_col0\" class=\"col_heading level0 col0\" >agent.agent\n",
+       "name</th>\n",
+       "      <th id=\"T_1d2fe_level0_col1\" class=\"col_heading level0 col1\" >env.benchmark</th>\n",
+       "      <th id=\"T_1d2fe_level0_col2\" class=\"col_heading level0 col2\" >avg\n",
+       "reward</th>\n",
+       "      <th id=\"T_1d2fe_level0_col3\" class=\"col_heading level0 col3\" >std\n",
+       "err</th>\n",
+       "      <th id=\"T_1d2fe_level0_col4\" class=\"col_heading level0 col4\" >avg\n",
+       "steps</th>\n",
+       "      <th id=\"T_1d2fe_level0_col5\" class=\"col_heading level0 col5\" >n\n",
+       "completed</th>\n",
+       "      <th id=\"T_1d2fe_level0_col6\" class=\"col_heading level0 col6\" >n\n",
+       "err</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th id=\"T_1d2fe_level0_row0\" class=\"row_heading level0 row0\" >0</th>\n",
+       "      <td id=\"T_1d2fe_row0_col0\" class=\"data row0 col0\" >HITL-GenericAgent-gpt-5-mini-2025-08-07</td>\n",
+       "      <td id=\"T_1d2fe_row0_col1\" class=\"data row0 col1\" >workarena</td>\n",
+       "      <td id=\"T_1d2fe_row0_col2\" class=\"data row0 col2\" >nan</td>\n",
+       "      <td id=\"T_1d2fe_row0_col3\" class=\"data row0 col3\" >nan</td>\n",
+       "      <td id=\"T_1d2fe_row0_col4\" class=\"data row0 col4\" >nan</td>\n",
+       "      <td id=\"T_1d2fe_row0_col5\" class=\"data row0 col5\" >0/1</td>\n",
+       "      <td id=\"T_1d2fe_row0_col6\" class=\"data row0 col6\" >0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n"
+      ],
+      "text/plain": [
+       "<pandas.io.formats.style.Styler at 0x125c55850>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
    "source": [
     "report = inspect_results.global_report(result_df)\n",
     "inspect_results.display_report(report)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f86e44fd",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.microsoft.datawrangler.viewer.v0+json": {
+       "columns": [
+        {
+         "name": "('agent.agent_name', 'env.benchmark')",
+         "rawType": "object",
+         "type": "unknown"
+        },
+        {
+         "name": "avg_reward",
+         "rawType": "float64",
+         "type": "float"
+        },
+        {
+         "name": "std_err",
+         "rawType": "float64",
+         "type": "float"
+        },
+        {
+         "name": "avg_steps",
+         "rawType": "float64",
+         "type": "float"
+        },
+        {
+         "name": "n_completed",
+         "rawType": "object",
+         "type": "string"
+        },
+        {
+         "name": "n_err",
+         "rawType": "int64",
+         "type": "integer"
+        }
+       ],
+       "ref": "ea68795e-a1d8-404e-9e36-1061d8fa9e87",
+       "rows": [
+        [
+         "('HITL-GenericAgent-gpt-5-mini-2025-08-07', 'workarena')",
+         null,
+         null,
+         null,
+         "0/1",
+         "0"
+        ]
+       ],
+       "shape": {
+        "columns": 5,
+        "rows": 1
+       }
+      },
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th>avg_reward</th>\n",
+       "      <th>std_err</th>\n",
+       "      <th>avg_steps</th>\n",
+       "      <th>n_completed</th>\n",
+       "      <th>n_err</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>agent.agent_name</th>\n",
+       "      <th>env.benchmark</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>HITL-GenericAgent-gpt-5-mini-2025-08-07</th>\n",
+       "      <th>workarena</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0/1</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                                       avg_reward  std_err  \\\n",
+       "agent.agent_name                        env.benchmark                        \n",
+       "HITL-GenericAgent-gpt-5-mini-2025-08-07 workarena             NaN      NaN   \n",
+       "\n",
+       "                                                       avg_steps n_completed  \\\n",
+       "agent.agent_name                        env.benchmark                          \n",
+       "HITL-GenericAgent-gpt-5-mini-2025-08-07 workarena            NaN         0/1   \n",
+       "\n",
+       "                                                       n_err  \n",
+       "agent.agent_name                        env.benchmark         \n",
+       "HITL-GenericAgent-gpt-5-mini-2025-08-07 workarena          0  "
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "\n"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "385559d7",
@@ -149,7 +393,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "AgentLab",
+   "display_name": "agentlab",
    "language": "python",
    "name": "python3"
   },
@@ -163,7 +407,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.12.7"
+   "version": "3.12.9"
   }
  },
  "nbformat": 4,