@@ -69,9 +69,7 @@ def set_index_from_variables(
6969 index_black_list: List of wildard patterns to match variables that
7070 should be excluded from the index.
7171 task_key: The key to use as the first level of the index.
72- force_at_leaste_one_variable: If True, force at least one variable in the
73- index. If no variable is found, the index will be set to
74- task_key + "agent.agent_name".
72+ add_agent_and_benchmark: If True, add agent.agent_name and env.benchmark
7573 """
7674 df .reset_index (inplace = True )
7775 constants , variables , _ = get_constants_and_variables (df )
@@ -127,6 +125,7 @@ def load_result_df(
127125 should be included in the index.
128126 index_black_list: List of wildard patterns to match variables that
129127 should be excluded from the index.
128+ remove_args_suffix: If True, remove the _args suffix from the columns
130129
131130 Returns:
132131 pd.DataFrame: The result dataframe
@@ -733,17 +732,13 @@ def _categorize_error(row):
733732
734733
735734def _benchmark_from_task_name (task_name : str ):
736- """Extract the benchmark from the task name.
737- TODO should be more robost, e.g. handle workarna.L1, workarena.L2, etc.
738- """
735+ """Extract the benchmark from the task name."""
736+ # TODO should be more robost, e.g. handle workarna.L1, workarena.L2, etc.
739737 return task_name .split ("." )[0 ]
740738
741739
742740def summarize_study (result_df : pd .DataFrame ) -> pd .DataFrame :
743- """Create a summary of the study.
744-
745- Similar to global report, but handles single agent differently.
746- """
741+ """Create a summary of the study. Similar to global report, but handles single agent differently."""
747742
748743 levels = list (range (result_df .index .nlevels ))
749744 return result_df .groupby (level = levels [1 :]).apply (summarize )
0 commit comments