Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions evals/elsuite/track_the_stat/scripts/make_plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ def zero_if_none(input_num):
MODELS = [
"gpt-4-0125-preview",
"gpt-4-base",
"gpt-4o",
"gpt-3.5-turbo-0125",
"gemini-pro-1.0",
"mixtral-8x7b-instruct",
Expand All @@ -32,6 +33,7 @@ def zero_if_none(input_num):
"gpt-4-0125-preview",
"gpt-3.5-turbo-0125",
"gpt-4-base",
"gpt-4o",
]

STAT_TO_LABEL = {
Expand All @@ -54,6 +56,8 @@ def get_model(spec):
return "gpt-3.5-turbo-0125"
elif "gpt-4-base" in spec["completion_fns"][0]:
return "gpt-4-base"
elif "gpt-4o" in spec["completion_fns"][0]:
return "gpt-4o"
elif "gemini-pro" in spec["completion_fns"][0]:
return "gemini-pro-1.0"
elif "mixtral-8x7b-instruct" in spec["completion_fns"][0]:
Expand Down
4 changes: 3 additions & 1 deletion evals/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ def n_ctx_from_model_name(model_name: str) -> Optional[int]:
("gpt-3.5-turbo-", 4096),
("gpt-4-32k-", 32768),
("gpt-4-", 8192),
("gpt-4o", 128_000),
]
MODEL_NAME_TO_N_CTX: dict[str, int] = {
"ada": 2048,
Expand All @@ -65,6 +66,7 @@ def n_ctx_from_model_name(model_name: str) -> Optional[int]:
"gpt-4-1106-preview": 128_000,
"gpt-4-turbo-preview": 128_000,
"gpt-4-0125-preview": 128_000,
"gpt-4o": 128_000
}

# first, look for an exact match
Expand All @@ -84,7 +86,7 @@ def is_chat_model(model_name: str) -> bool:
if model_name in {"gpt-4-base"} or model_name.startswith("gpt-3.5-turbo-instruct"):
return False

CHAT_MODEL_NAMES = {"gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-4", "gpt-4-32k"}
CHAT_MODEL_NAMES = {"gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-4", "gpt-4-32k", "gpt-4o"}

if model_name in CHAT_MODEL_NAMES:
return True
Expand Down