Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 5 additions & 10 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -19,20 +19,15 @@ goex/docker/misc/images.json

################## Berkley Function Call Leaderboard ##########################

# Ignore tree-sitter
berkeley-function-call-leaderboard/eval_checker/tree-sitter-java
berkeley-function-call-leaderboard/eval_checker/tree-sitter-javascript
berkeley-function-call-leaderboard/tree-sitter-java
berkeley-function-call-leaderboard/tree-sitter-javascript

# Ignore aggregated eval data (used for OSS models)
berkeley-function-call-leaderboard/eval_data_total.json

# Ignore inference results
berkeley-function-call-leaderboard/result/

# Ignore leaderboard score
berkeley-function-call-leaderboard/score/

# Ignore environment variables
berkeley-function-call-leaderboard/.env
!berkeley-function-call-leaderboard/.env.example

.direnv/
.venv
.venv
4 changes: 4 additions & 0 deletions berkeley-function-call-leaderboard/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ ANTHROPIC_API_KEY=
NVIDIA_API_KEY=nvapi-XXXXXX
YI_API_KEY=

# We use Vertex AI to inference Google Gemini models
VERTEX_AI_PROJECT_ID=
VERTEX_AI_LOCATION=

COHERE_API_KEY=
USE_COHERE_OPTIMIZATION=False # True/False

Expand Down
124 changes: 124 additions & 0 deletions berkeley-function-call-leaderboard/CHANGELOG.md

Large diffs are not rendered by default.

305 changes: 112 additions & 193 deletions berkeley-function-call-leaderboard/README.md

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import json
import argparse
import os
from bfcl.eval_checker.custom_exception import NoAPIKeyError
from bfcl.eval_checker.executable_eval.custom_exception import NoAPIKeyError
from dotenv import load_dotenv

parser = argparse.ArgumentParser(description="Replace placeholders in the function credential config file.")
Expand Down
166 changes: 166 additions & 0 deletions berkeley-function-call-leaderboard/bfcl/constant.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
VERSION_PREFIX = "BFCL_v3"

TEST_FILE_MAPPING = {
"exec_simple": f"{VERSION_PREFIX}_exec_simple.json",
"exec_parallel": f"{VERSION_PREFIX}_exec_parallel.json",
"exec_multiple": f"{VERSION_PREFIX}_exec_multiple.json",
"exec_parallel_multiple": f"{VERSION_PREFIX}_exec_parallel_multiple.json",
"simple": f"{VERSION_PREFIX}_simple.json",
"irrelevance": f"{VERSION_PREFIX}_irrelevance.json",
"parallel": f"{VERSION_PREFIX}_parallel.json",
"multiple": f"{VERSION_PREFIX}_multiple.json",
"parallel_multiple": f"{VERSION_PREFIX}_parallel_multiple.json",
"java": f"{VERSION_PREFIX}_java.json",
"javascript": f"{VERSION_PREFIX}_javascript.json",
"rest": f"{VERSION_PREFIX}_rest.json",
"sql": f"{VERSION_PREFIX}_sql.json",
"chatable": f"{VERSION_PREFIX}_chatable.json",
# Live Datasets
"live_simple": f"{VERSION_PREFIX}_live_simple.json",
"live_multiple": f"{VERSION_PREFIX}_live_multiple.json",
"live_parallel": f"{VERSION_PREFIX}_live_parallel.json",
"live_parallel_multiple": f"{VERSION_PREFIX}_live_parallel_multiple.json",
"live_irrelevance": f"{VERSION_PREFIX}_live_irrelevance.json",
"live_relevance": f"{VERSION_PREFIX}_live_relevance.json",
# Multi-turn Datasets
"multi_turn_base": f"{VERSION_PREFIX}_multi_turn_base.json",
"multi_turn_miss_func": f"{VERSION_PREFIX}_multi_turn_miss_func.json",
"multi_turn_miss_param": f"{VERSION_PREFIX}_multi_turn_miss_param.json",
"multi_turn_long_context": f"{VERSION_PREFIX}_multi_turn_long_context.json",
"multi_turn_composite": f"{VERSION_PREFIX}_multi_turn_composite.json",
}

TEST_COLLECTION_MAPPING = {
"all": [
"exec_simple",
"exec_parallel",
"exec_multiple",
"exec_parallel_multiple",
"simple",
"irrelevance",
"parallel",
"multiple",
"parallel_multiple",
"java",
"javascript",
"rest",
"live_simple",
"live_multiple",
"live_parallel",
"live_parallel_multiple",
"live_irrelevance",
"live_relevance",
"multi_turn_base",
"multi_turn_miss_func",
"multi_turn_miss_param",
"multi_turn_long_context",
"multi_turn_composite",
],
"multi_turn": [
"multi_turn_base",
"multi_turn_miss_func",
"multi_turn_miss_param",
"multi_turn_long_context",
"multi_turn_composite",
],
"single_turn": [
"exec_simple",
"exec_parallel",
"exec_multiple",
"exec_parallel_multiple",
"simple",
"irrelevance",
"parallel",
"multiple",
"parallel_multiple",
"java",
"javascript",
"rest",
"live_simple",
"live_multiple",
"live_parallel",
"live_parallel_multiple",
"live_irrelevance",
"live_relevance",
],
"live": [
"live_simple",
"live_multiple",
"live_parallel",
"live_parallel_multiple",
"live_irrelevance",
"live_relevance",
],
"non_live": [
"exec_simple",
"exec_parallel",
"exec_multiple",
"exec_parallel_multiple",
"simple",
"irrelevance",
"parallel",
"multiple",
"parallel_multiple",
"java",
"javascript",
"rest",
],
# TODO: Update this mapping
"ast": [
"simple",
"irrelevance",
"parallel",
"multiple",
"parallel_multiple",
"java",
"javascript",
"live_simple",
"live_multiple",
"live_parallel",
"live_parallel_multiple",
"live_irrelevance",
"live_relevance",
],
"executable": [
"exec_simple",
"exec_parallel",
"exec_multiple",
"exec_parallel_multiple",
"rest",
],
"non_python": [
"java",
"javascript",
],
"python": [
"exec_simple",
"exec_parallel",
"exec_multiple",
"exec_parallel_multiple",
"simple",
"irrelevance",
"parallel",
"multiple",
"parallel_multiple",
"rest",
"live_simple",
"live_multiple",
"live_parallel",
"live_parallel_multiple",
"live_irrelevance",
"live_relevance",
],
"python_ast": [
"simple",
"irrelevance",
"parallel",
"multiple",
"parallel_multiple",
"live_simple",
"live_multiple",
"live_parallel",
"live_parallel_multiple",
"live_irrelevance",
"live_relevance",
],
}
Loading