Skip to content

Commit 8af0e2f

Browse files
committed
update
1 parent 68a3d1a commit 8af0e2f

File tree

3 files changed

+7
-3
lines changed

3 files changed

+7
-3
lines changed

evalscope/app/utils/visualization.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,14 +38,18 @@ def plot_single_report_sunburst(report_list: List[Report]):
3838
path = [ReportKey.dataset_name] + categories + [ReportKey.subset_name]
3939
logger.debug(f'df: \n{df}')
4040
df[categories] = df[categories].fillna('default') # NOTE: fillna for empty categories
41+
df = df[df[ReportKey.num] > 0] # NOTE: filter out zero-num rows to avoid ZeroDivisionError in plotly
42+
if df.empty:
43+
return None
4144

4245
plot = px.sunburst(
4346
df,
4447
path=path,
4548
values=ReportKey.num,
4649
color=ReportKey.score,
4750
color_continuous_scale='RdYlGn', # see https://plotly.com/python/builtin-colorscales/
48-
color_continuous_midpoint=np.average(df[ReportKey.score], weights=df[ReportKey.num]),
51+
color_continuous_midpoint=np.average(df[ReportKey.score], weights=df[ReportKey.num])
52+
if df[ReportKey.num].sum() > 0 else df[ReportKey.score].mean(),
4953
template=PLOTLY_THEME,
5054
maxdepth=4
5155
)

tests/benchmark/test_eval.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -387,7 +387,7 @@ def test_bfcl_v3(self):
387387
'underscore_to_dot': True
388388
}
389389
}
390-
self._run_dataset_test('bfcl_v3', dataset_args=dataset_args, model='qwen-plus', limit=10)
390+
self._run_dataset_test('bfcl_v3', dataset_args=dataset_args, model='qwen-plus', limit=1)
391391

392392
def test_bfcl_v4(self):
393393
"""Test BFCL v4 dataset."""

tests/cli/test_all.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@
5151
'frames',
5252
'docmath',
5353
'needle_haystack',
54-
'bfcl_v3',
54+
# 'bfcl_v3',
5555
'hle',
5656
'tau_bench',
5757
]

0 commit comments

Comments
 (0)