revert change to ipynb

amanjaiswal73892 · amanjaiswal73892 · commit 6b78e8e4816c · 2025-09-02T19:20:36.000-04:00
diff --git a/tutorials/2_eval_on_miniwob/inspect_results.ipynb b/tutorials/2_eval_on_miniwob/inspect_results.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "id": "58086537",
    "metadata": {},
    "outputs": [],
@@ -25,47 +25,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
-   "id": "7901cccc",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "PosixPath('/Users/aman.jaiswal/Work/AgentLab.worktrees/trace-recorder/results')"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "RESULTS_DIR"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "id": "50be19a9",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "/Users/aman.jaiswal/Work/AgentLab.worktrees/trace-recorder/results/2025-09-02_15-52-00_hitl-genericagent-gpt-5-mini-2025-08-07-on-workarena-l1-task-name-create\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Searching experiments directories.: 100%|██████████| 1/1 [00:00<00:00, 5433.04it/s]\n",
-      "Loading results: 100%|██████████| 1/1 [00:00<00:00, 373.26it/s]\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# replace this by your desired directory if needed.\n",
     "result_dir = get_most_recent_study(RESULTS_DIR, contains=None)\n",
@@ -76,222 +39,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
-   "id": "82cc1557",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "PosixPath('/Users/aman.jaiswal/Work/AgentLab.worktrees/trace-recorder/results/2025-09-02_15-52-00_hitl-genericagent-gpt-5-mini-2025-08-07-on-workarena-l1-task-name-create')"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "result_dir"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "id": "a424c470",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Found multiple configuration, averaging across tasks and returning a per-agent report.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<style type=\"text/css\">\n",
-       "#T_1d2fe th {\n",
-       "  white-space: pre-wrap;\n",
-       "}\n",
-       "</style>\n",
-       "<table id=\"T_1d2fe\">\n",
-       "  <thead>\n",
-       "    <tr>\n",
-       "      <th class=\"blank level0\" >&nbsp;</th>\n",
-       "      <th id=\"T_1d2fe_level0_col0\" class=\"col_heading level0 col0\" >agent.agent\n",
-       "name</th>\n",
-       "      <th id=\"T_1d2fe_level0_col1\" class=\"col_heading level0 col1\" >env.benchmark</th>\n",
-       "      <th id=\"T_1d2fe_level0_col2\" class=\"col_heading level0 col2\" >avg\n",
-       "reward</th>\n",
-       "      <th id=\"T_1d2fe_level0_col3\" class=\"col_heading level0 col3\" >std\n",
-       "err</th>\n",
-       "      <th id=\"T_1d2fe_level0_col4\" class=\"col_heading level0 col4\" >avg\n",
-       "steps</th>\n",
-       "      <th id=\"T_1d2fe_level0_col5\" class=\"col_heading level0 col5\" >n\n",
-       "completed</th>\n",
-       "      <th id=\"T_1d2fe_level0_col6\" class=\"col_heading level0 col6\" >n\n",
-       "err</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th id=\"T_1d2fe_level0_row0\" class=\"row_heading level0 row0\" >0</th>\n",
-       "      <td id=\"T_1d2fe_row0_col0\" class=\"data row0 col0\" >HITL-GenericAgent-gpt-5-mini-2025-08-07</td>\n",
-       "      <td id=\"T_1d2fe_row0_col1\" class=\"data row0 col1\" >workarena</td>\n",
-       "      <td id=\"T_1d2fe_row0_col2\" class=\"data row0 col2\" >nan</td>\n",
-       "      <td id=\"T_1d2fe_row0_col3\" class=\"data row0 col3\" >nan</td>\n",
-       "      <td id=\"T_1d2fe_row0_col4\" class=\"data row0 col4\" >nan</td>\n",
-       "      <td id=\"T_1d2fe_row0_col5\" class=\"data row0 col5\" >0/1</td>\n",
-       "      <td id=\"T_1d2fe_row0_col6\" class=\"data row0 col6\" >0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n"
-      ],
-      "text/plain": [
-       "<pandas.io.formats.style.Styler at 0x125c55850>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
    "source": [
     "report = inspect_results.global_report(result_df)\n",
     "inspect_results.display_report(report)"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "f86e44fd",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.microsoft.datawrangler.viewer.v0+json": {
-       "columns": [
-        {
-         "name": "('agent.agent_name', 'env.benchmark')",
-         "rawType": "object",
-         "type": "unknown"
-        },
-        {
-         "name": "avg_reward",
-         "rawType": "float64",
-         "type": "float"
-        },
-        {
-         "name": "std_err",
-         "rawType": "float64",
-         "type": "float"
-        },
-        {
-         "name": "avg_steps",
-         "rawType": "float64",
-         "type": "float"
-        },
-        {
-         "name": "n_completed",
-         "rawType": "object",
-         "type": "string"
-        },
-        {
-         "name": "n_err",
-         "rawType": "int64",
-         "type": "integer"
-        }
-       ],
-       "ref": "ea68795e-a1d8-404e-9e36-1061d8fa9e87",
-       "rows": [
-        [
-         "('HITL-GenericAgent-gpt-5-mini-2025-08-07', 'workarena')",
-         null,
-         null,
-         null,
-         "0/1",
-         "0"
-        ]
-       ],
-       "shape": {
-        "columns": 5,
-        "rows": 1
-       }
-      },
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th>avg_reward</th>\n",
-       "      <th>std_err</th>\n",
-       "      <th>avg_steps</th>\n",
-       "      <th>n_completed</th>\n",
-       "      <th>n_err</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>agent.agent_name</th>\n",
-       "      <th>env.benchmark</th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>HITL-GenericAgent-gpt-5-mini-2025-08-07</th>\n",
-       "      <th>workarena</th>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>0/1</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                                                       avg_reward  std_err  \\\n",
-       "agent.agent_name                        env.benchmark                        \n",
-       "HITL-GenericAgent-gpt-5-mini-2025-08-07 workarena             NaN      NaN   \n",
-       "\n",
-       "                                                       avg_steps n_completed  \\\n",
-       "agent.agent_name                        env.benchmark                          \n",
-       "HITL-GenericAgent-gpt-5-mini-2025-08-07 workarena            NaN         0/1   \n",
-       "\n",
-       "                                                       n_err  \n",
-       "agent.agent_name                        env.benchmark         \n",
-       "HITL-GenericAgent-gpt-5-mini-2025-08-07 workarena          0  "
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "\n"
-   ]
-  },
   {
    "cell_type": "markdown",
    "id": "385559d7",
@@ -393,7 +149,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "agentlab",
+   "display_name": "AgentLab",
    "language": "python",
    "name": "python3"
   },
@@ -407,7 +163,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.12.9"
+   "version": "3.12.7"
   }
  },
  "nbformat": 4,