Skip to content

Commit c7d07de

Browse files
authored
Fix eval + add smoke test (axolotl-ai-cloud#2586)
* fix evaluate CLI * add smoke test * fix naming * lint
1 parent 6565ae8 commit c7d07de

File tree

3 files changed

+83
-26
lines changed

3 files changed

+83
-26
lines changed

src/axolotl/cli/evaluate.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""CLI to run evaluation on a model."""
22

33
import logging
4+
import os
45
from pathlib import Path
56
from typing import Union
67

@@ -14,6 +15,7 @@
1415
from axolotl.cli.config import load_cfg
1516
from axolotl.common.datasets import load_datasets, load_preference_datasets
1617
from axolotl.evaluate import evaluate
18+
from axolotl.utils import set_pytorch_cuda_alloc_conf
1719
from axolotl.utils.dict import DictDefault
1820

1921
LOG = logging.getLogger(__name__)
@@ -29,10 +31,14 @@ def do_evaluate(cfg: DictDefault, cli_args: TrainerCliArgs) -> None:
2931
cfg: Dictionary mapping `axolotl` config keys to values.
3032
cli_args: CLI arguments.
3133
"""
34+
# Enable expandable segments for cuda allocation to improve VRAM usage
35+
set_pytorch_cuda_alloc_conf()
36+
3237
# pylint: disable=duplicate-code
3338
print_axolotl_text_art()
3439
check_accelerate_default_config()
35-
check_user_token()
40+
if int(os.getenv("LOCAL_RANK", "0")) == 0:
41+
check_user_token()
3642

3743
if cfg.rl:
3844
dataset_meta = load_preference_datasets(cfg=cfg, cli_args=cli_args)

src/axolotl/evaluate.py

Lines changed: 11 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -12,19 +12,20 @@
1212
from transformers.trainer import Trainer
1313

1414
from axolotl.logging_config import configure_logging
15-
from axolotl.train import TrainDatasetMeta
16-
from axolotl.utils import set_pytorch_cuda_alloc_conf
15+
from axolotl.train import (
16+
TrainDatasetMeta,
17+
setup_model_and_tokenizer,
18+
)
1719
from axolotl.utils.dict import DictDefault
1820
from axolotl.utils.distributed import cleanup_distributed
19-
from axolotl.utils.models import load_model, load_processor, load_tokenizer
2021
from axolotl.utils.trainer import setup_trainer
2122

2223
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
2324
src_dir = os.path.join(project_root, "src")
2425
sys.path.insert(0, src_dir)
2526

2627
configure_logging()
27-
LOG = get_logger("axolotl.evaluate")
28+
LOG = get_logger(__name__)
2829

2930

3031
def evaluate_dataset(
@@ -75,37 +76,22 @@ def evaluate(*, cfg: DictDefault, dataset_meta: TrainDatasetMeta) -> Dict[str, f
7576
Returns:
7677
Dictionary mapping metric names to their values.
7778
"""
78-
# pylint: disable=duplicate-code
79-
# Enable expandable segments for cuda allocation to improve VRAM usage
80-
set_pytorch_cuda_alloc_conf()
81-
82-
# Load tokenizer
83-
LOG.debug(
84-
f"loading tokenizer... {cfg.tokenizer_config or cfg.base_model_config}",
85-
main_process_only=True,
86-
)
87-
tokenizer = load_tokenizer(cfg)
88-
89-
# Load processor for multimodal models if needed
90-
processor = None
91-
if cfg.is_multimodal:
92-
processor = load_processor(cfg, tokenizer)
79+
# Load tokenizer, processor and model
80+
LOG.debug("loading model for evaluation...")
81+
model, tokenizer, _, processor = setup_model_and_tokenizer(cfg)
9382

9483
# Get datasets
84+
# pylint: disable=duplicate-code
9585
train_dataset = dataset_meta.train_dataset
9686
eval_dataset = dataset_meta.eval_dataset
9787
total_num_steps = dataset_meta.total_num_steps
9888

99-
# Load model
100-
LOG.debug("loading model for evaluation...")
101-
model, _ = load_model(cfg, tokenizer, processor=processor)
102-
10389
# Set up trainer
10490
trainer = setup_trainer(
105-
cfg,
91+
cfg=cfg,
10692
train_dataset=train_dataset,
10793
eval_dataset=eval_dataset,
108-
model=(model, None, None), # No need for model_ref or peft_config
94+
model=model,
10995
tokenizer=tokenizer,
11096
processor=processor,
11197
total_num_steps=total_num_steps,

tests/e2e/test_evaluate.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
"""E2E smoke test for evaluate CLI command"""
2+
3+
import os
4+
from pathlib import Path
5+
6+
import yaml
7+
from accelerate.test_utils import execute_subprocess_async
8+
from transformers.testing_utils import get_torch_dist_unique_port
9+
10+
from axolotl.utils.dict import DictDefault
11+
12+
os.environ["WANDB_DISABLED"] = "true"
13+
14+
15+
class TestE2eEvaluate:
16+
"""Test cases for evaluate CLI"""
17+
18+
def test_evaluate(self, temp_dir):
19+
# pylint: disable=duplicate-code
20+
cfg = DictDefault(
21+
{
22+
"base_model": "JackFram/llama-68m",
23+
"tokenizer_type": "LlamaTokenizer",
24+
"sequence_len": 1024,
25+
"val_set_size": 0.02,
26+
"special_tokens": {
27+
"unk_token": "<unk>",
28+
"bos_token": "<s>",
29+
"eos_token": "</s>",
30+
},
31+
"datasets": [
32+
{
33+
"path": "mhenrichsen/alpaca_2k_test",
34+
"type": "alpaca",
35+
},
36+
],
37+
"num_epochs": 1,
38+
"micro_batch_size": 8,
39+
"gradient_accumulation_steps": 1,
40+
"output_dir": temp_dir,
41+
"learning_rate": 0.00001,
42+
"optimizer": "adamw_torch_fused",
43+
"lr_scheduler": "cosine",
44+
"max_steps": 20,
45+
}
46+
)
47+
48+
# write cfg to yaml file
49+
Path(temp_dir).mkdir(parents=True, exist_ok=True)
50+
with open(Path(temp_dir) / "config.yaml", "w", encoding="utf-8") as fout:
51+
fout.write(yaml.dump(cfg.to_dict(), Dumper=yaml.Dumper))
52+
53+
execute_subprocess_async(
54+
[
55+
"accelerate",
56+
"launch",
57+
"--num-processes",
58+
"2",
59+
"--main_process_port",
60+
f"{get_torch_dist_unique_port()}",
61+
"-m",
62+
"axolotl.cli.evaluate",
63+
str(Path(temp_dir) / "config.yaml"),
64+
]
65+
)

0 commit comments

Comments
 (0)