Skip to content

Commit c3b2628

Browse files
Fix lm_eval.tasks' has no attribute 'initialize_tasks' error (meta-llama#488)
2 parents c113695 + a2a2ffd commit c3b2628

File tree

2 files changed

+8
-12
lines changed

2 files changed

+8
-12
lines changed

recipes/evaluation/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ Before running the evaluation script, ensure you have all the necessary dependen
2828
Clone the lm-evaluation-harness repository and install it:
2929

3030
```bash
31-
git clone https://github.com/matthoffner/lm-evaluation-harness.git
31+
git clone https://github.com/EleutherAI/lm-evaluation-harness.git
3232
cd lm-evaluation-harness
3333
pip install -e .
3434

recipes/evaluation/eval.py

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
import numpy as np
1313
import lm_eval
14-
from lm_eval import evaluator, tasks
14+
from lm_eval import tasks
1515
from lm_eval.utils import make_table
1616

1717

@@ -73,20 +73,19 @@ def handle_output(args, results, logger):
7373

7474

7575
def load_tasks(args):
76-
tasks.initialize_tasks()
7776
if args.open_llm_leaderboard_tasks:
7877
current_dir = os.getcwd()
7978
config_dir = os.path.join(current_dir, "open_llm_leaderboard")
80-
lm_eval.tasks.include_path(config_dir)
81-
return [
79+
task_manager = tasks.TaskManager(include_path=config_dir)
80+
return task_manager, [
8281
"arc_challenge_25_shot",
8382
"hellaswag_10_shot",
8483
"truthfulqa_mc2",
8584
"winogrande_5_shot",
8685
"gsm8k",
8786
"mmlu",
8887
]
89-
return args.tasks.split(",") if args.tasks else []
88+
return None, args.tasks.split(",") if args.tasks else []
9089

9190

9291
def parse_eval_args():
@@ -190,21 +189,18 @@ def parse_eval_args():
190189
default=None,
191190
help="Additional path to include if there are external tasks.",
192191
)
193-
parser.add_argument(
194-
"--decontamination_ngrams_path", default=None
195-
) # Not currently used
196192
return parser.parse_args()
197193

198194

199195
def evaluate_model(args):
200196
try:
201-
task_list = load_tasks(args)
197+
task_manager, task_list = load_tasks(args)
202198
# Customized model such as Quantized model etc.
203199
# In case you are working with a custom model, you can use the following guide to add it here:
204200
# https://github.com/EleutherAI/lm-evaluation-harness/blob/main/docs/interface.md#external-library-usage
205201

206202
# Evaluate
207-
results = evaluator.simple_evaluate(
203+
results = lm_eval.simple_evaluate(
208204
model=args.model,
209205
model_args=args.model_args,
210206
tasks=task_list,
@@ -214,11 +210,11 @@ def evaluate_model(args):
214210
device=args.device,
215211
use_cache=args.use_cache,
216212
limit=args.limit,
217-
decontamination_ngrams_path=args.decontamination_ngrams_path,
218213
check_integrity=args.check_integrity,
219214
write_out=args.write_out,
220215
log_samples=args.log_samples,
221216
gen_kwargs=args.gen_kwargs,
217+
task_manager=task_manager,
222218
)
223219
handle_output(args, results, logger)
224220

0 commit comments

Comments
 (0)