Skip to content

Commit 906e830

Browse files
committed
removing old code
1 parent d710665 commit 906e830

File tree

13 files changed

+13
-1718
lines changed

13 files changed

+13
-1718
lines changed

ICML2024/script.ipynb

Lines changed: 0 additions & 732 deletions
This file was deleted.

src/agentlab/analyze/error_categorization.py

Lines changed: 0 additions & 89 deletions
This file was deleted.

src/agentlab/analyze/inspect_results.py

Lines changed: 2 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,10 @@
11
import fnmatch
2-
import io
32
import json
43
import random
54
import re
65
import traceback
76
import warnings
87
from collections import defaultdict
9-
from datetime import datetime
108
from logging import warn
119
from pathlib import Path
1210

@@ -16,25 +14,14 @@
1614
from IPython.display import display
1715
from tqdm import tqdm
1816

19-
from agentlab.analyze.error_categorization import (
20-
ERR_CLASS_MAP,
21-
is_critical_server_error,
22-
is_minor_server_error,
23-
)
2417
from agentlab.experiments.exp_utils import RESULTS_DIR
25-
from agentlab.utils.bootstrap import bootstrap_matrix, convert_df_to_array
2618

2719
# TODO find a more portable way to code set_task_category_as_index at least
2820
# handle dynamic imports. We don't want to always import workarena
2921
# from browsergym.workarena import TASK_CATEGORY_MAP
3022

3123
warnings.filterwarnings("ignore", category=pd.errors.PerformanceWarning)
3224

33-
try:
34-
import pyperclip
35-
except ImportError:
36-
pyperclip = None
37-
3825
pd.set_option("display.multi_sparse", False)
3926

4027
AGENT_NAME_KEY = "agent.agent_name"
@@ -224,17 +211,6 @@ def report_constant_and_variables(df, show_stack_traces=True):
224211
print(f" ...\n")
225212

226213

227-
def get_bootstrap(df, metric, reduce_fn=np.nanmean, n_bootstrap=100, group_by=TASK_KEY, prior=0.5):
228-
"""Get the stratified bootstrap mean and std for the given metric."""
229-
grouped_df = df.reset_index(inplace=False).groupby(group_by)
230-
array = convert_df_to_array(grouped_df, metric=metric, threshold=0.7)
231-
if prior is not None:
232-
prior = prior * np.ones((len(array), 1))
233-
array = np.concatenate([array, prior], axis=1)
234-
235-
bootstrapped_values = bootstrap_matrix(array, n_bootstrap=n_bootstrap, reduce_fn=reduce_fn)
236-
return np.nanmean(bootstrapped_values), np.nanstd(bootstrapped_values)
237-
238214

239215
def get_std_err(df, metric):
240216
"""Get the standard error for a binary metric."""
@@ -262,7 +238,7 @@ def get_sample_std_err(df, metric):
262238
return mean, std_err
263239

264240

265-
def summarize(sub_df, use_bootstrap=False):
241+
def summarize(sub_df):
266242
if not "cum_reward" in sub_df:
267243
record = dict(
268244
avg_reward=np.nan,
@@ -279,10 +255,7 @@ def summarize(sub_df, use_bootstrap=False):
279255
if n_completed == 0:
280256
return None
281257

282-
if use_bootstrap:
283-
_mean_reward, std_reward = get_bootstrap(sub_df, "cum_reward")
284-
else:
285-
_mean_reward, std_reward = get_std_err(sub_df, "cum_reward")
258+
_mean_reward, std_reward = get_std_err(sub_df, "cum_reward")
286259

287260
# sanity check, if there is an error the reward should be zero
288261
assert sub_df[sub_df["err_msg"].notnull()]["cum_reward"].sum() == 0
@@ -466,20 +439,6 @@ def _rename_bool_flags(report: pd.DataFrame, true_str="✓", false_str="-"):
466439
return report
467440

468441

469-
def to_clipboard(df: pd.DataFrame):
470-
"""Copy the dataframe to the clipboard as a tab separated csv."""
471-
output = io.StringIO()
472-
df.to_csv(output, sep="\t", index=True)
473-
csv_string = output.getvalue()
474-
if pyperclip is not None:
475-
try:
476-
pyperclip.copy(csv_string)
477-
except Exception as e:
478-
warn(f"Failed to copy to clipboard: {e}")
479-
# else:
480-
# print("pyperclip is not installed, cannot copy to clipboard.")
481-
# return df
482-
483442

484443
def flag_report(report: pd.DataFrame, metric: str = "avg_reward", round_digits: int = 2):
485444
# for all index in the multi-index with boolean value, get the average for

0 commit comments

Comments
 (0)