Skip to content

Commit fd78adf

Browse files
committed
chore: update tests to include system prompt in MMLU evals
Signed-off-by: Oleg S <[email protected]>
1 parent ab664b8 commit fd78adf

File tree

2 files changed

+16
-3
lines changed

2 files changed

+16
-3
lines changed

scripts/test_mmlu.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,19 @@
11
# First Party
22
from instructlab.eval.mmlu import MMLUEvaluator
33

4+
SYSTEM_PROMPT = """I am, Red Hat® Instruct Model based on Granite 7B, an AI language model developed by Red Hat and IBM Research, based on the Granite-7b-base language model. My primary function is to be a chat assistant."""
5+
46

57
def test_minimal_mmlu():
68
print("===> Executing 'test_minimal_mmlu'...")
79
try:
810
model_path = "instructlab/granite-7b-lab"
911
tasks = ["mmlu_anatomy", "mmlu_astronomy"]
10-
mmlu = MMLUEvaluator(model_path=model_path, tasks=tasks)
12+
mmlu = MMLUEvaluator(
13+
model_path=model_path,
14+
tasks=tasks,
15+
system_prompt=SYSTEM_PROMPT,
16+
)
1117
overall_score, individual_scores = mmlu.run()
1218
print(overall_score)
1319
print(individual_scores)

tests/test_mmlu.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,10 @@ def test_mmlu_branch(eval_mock):
4848
tasks_dir = f"{os.path.dirname(os.path.realpath(__file__))}/testdata/sdg"
4949
tasks = ["mmlu_pr"]
5050
mmlu = MMLUBranchEvaluator(
51-
model_path=MODEL_EXAMPLE, tasks_dir=tasks_dir, tasks=tasks
51+
model_path=MODEL_EXAMPLE,
52+
tasks_dir=tasks_dir,
53+
tasks=tasks,
54+
system_prompt="You are an intelligent AI language model.",
5255
)
5356
overall_score, individual_scores = mmlu.run()
5457

@@ -62,7 +65,11 @@ def test_mmlu_branch(eval_mock):
6265
)
6366
def test_mmlu(eval_mock):
6467
tasks = ["mmlu_anatomy", "mmlu_astronomy", "mmlu_algebra"]
65-
mmlu = MMLUEvaluator(model_path=MODEL_EXAMPLE, tasks=tasks)
68+
mmlu = MMLUEvaluator(
69+
model_path=MODEL_EXAMPLE,
70+
tasks=tasks,
71+
system_prompt="You are an intelligent AI language model.",
72+
)
6673
overall_score, individual_scores = mmlu.run()
6774

6875
eval_mock.assert_called()

0 commit comments

Comments
 (0)