Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 37 additions & 10 deletions lm_eval/models/huggingface.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import os
import math
import torch
import torch.nn.functional as F
import transformers
import deepspeed
from typing import List, Mapping, NewType, Optional, Tuple, Union
from tqdm import tqdm

Expand Down Expand Up @@ -77,6 +79,7 @@ def __init__(
offload_folder: Optional[str] = "./offload",
dtype: Optional[Union[str, torch.dtype]] = None,
device: Optional[Union[int, str]] = "cuda",
use_deepspeed: Optional[bool] = False
):
"""Initializes a HuggingFace `AutoModel` and `AutoTokenizer` for evaluation.

Expand Down Expand Up @@ -147,6 +150,7 @@ def __init__(
self._batch_size = batch_size # TODO: Adaptive batch size
self._max_gen_toks = max_gen_toks
self._max_length = max_length
self.use_deepspeed = use_deepspeed
self._config = self.AUTO_CONFIG_CLASS.from_pretrained(
pretrained,
revision=revision + ("/" + subfolder if subfolder is not None else ""),
Expand Down Expand Up @@ -185,7 +189,7 @@ def __init__(
# the user specified one so we force `self._device` to be the same as
# `lm_head`'s.
self._device = self.model.hf_device_map["lm_head"]
if not use_accelerate:
if not use_accelerate and not use_deepspeed:
self.model.to(self._device)

def _create_auto_model(
Expand All @@ -199,15 +203,38 @@ def _create_auto_model(
offload_folder: Optional[str] = None,
torch_dtype: Optional[Union[str, torch.dtype]] = None,
) -> transformers.AutoModel:
"""Returns a pre-trained pytorch model from a pre-trained model configuration."""
model = self.AUTO_MODEL_CLASS.from_pretrained(
pretrained,
revision=revision + ("/" + subfolder if subfolder is not None else ""),
device_map=device_map,
max_memory=max_memory,
offload_folder=offload_folder,
torch_dtype=torch_dtype,
)
if self.use_deepspeed:
# Deepspeed iniatilization
world_size = int(os.getenv("WORLD_SIZE", "1"))
deepspeed.init_distributed("nccl")

with deepspeed.OnDevice(dtype=torch.float16, device="meta"):
model = self.AUTO_MODEL_CLASS.from_config(self._config, torch_dtype=torch.bfloat16)

model = model.eval()
checkpoints_json = os.path.join(pretrained, "ds_inference_config.json")
tp_config = deepspeed.inference.config.DeepSpeedTPConfig()
tp_config.tp_size = world_size

model = deepspeed.init_inference(
model,
tensor_parallel=tp_config,
base_dir=pretrained,
dtype=torch_dtype,
checkpoint=checkpoints_json,
replace_with_kernel_inject=True,

)
else:
"""Returns a pre-trained pytorch model from a pre-trained model configuration."""
model = self.AUTO_MODEL_CLASS.from_pretrained(
pretrained,
revision=revision + ("/" + subfolder if subfolder is not None else ""),
device_map=device_map,
max_memory=max_memory,
offload_folder=offload_folder,
torch_dtype=torch_dtype,
)
return model

def _create_auto_tokenizer(
Expand Down
4 changes: 4 additions & 0 deletions lm_eval/tasks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
from . import tydiqa
from . import wino_bias
from . import wmt
from . import xnli
from . import xquad


Expand Down Expand Up @@ -147,6 +148,9 @@
# Bias-Shades
# Format: `bias_shades_{lang}`
**bias_shades.construct_tasks(),
# XNLI
# Format: `xnli_{lang}`
**xnli.construct_tasks(),
# BLiMP
"blimp_adjunct_island": blimp.BlimpAdjunctIsland,
"blimp_anaphor_gender_agreement": blimp.BlimpAnaphorGenderAgreement,
Expand Down
74 changes: 74 additions & 0 deletions lm_eval/tasks/xnli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
"""
XNLI is an evaluation corpus for language transfer and cross-lingual sentence classification in 15 languages.
https://arxiv.org/abs/1809.05053
Homepage: None, Repo: https://github.com/facebookresearch/XNLI
"""
import typing

from lm_eval.api.task import PromptSourceTask


_CITATION = """
@inproceedings{conneau2018xnli,
title={XNLI: Evaluating Cross-lingual Sentence Representations},
author={Conneau, Alexis and Rinott, Ruty and Lample, Guillaume and Williams, Adina and Bowman, Samuel and Schwenk, Holger and Stoyanov, Veselin},
booktitle={Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing},
pages={2475--2485},
year={2018}
}
}"""


class XNLI(PromptSourceTask):
VERSION = 1
DATASET_PATH = "xnli"
DATASET_NAME = None

def has_training_docs(self):
return True

def has_validation_docs(self):
return True

def has_test_docs(self):
return True

def training_docs(self):
if self.has_training_docs():
return self.dataset["train"]

def validation_docs(self):
if self.has_validation_docs():
return self.dataset["validation"]

def test_docs(self):
if self.has_test_docs():
return self.dataset["test"]


class XNLIEn(XNLI):
DATASET_NAME = "en"


class XNLIFr(XNLI):
DATASET_NAME = "fr"


XNLI_TASKS = [
XNLIEn,
XNLIFr,
]


def construct_tasks() -> typing.Dict[str, XNLI]:
"""
Returns a dictionary of tasks keyed by task name, for example:
"GEM/wiki_lingua_ar"
will dispatch to the GEM WikiLingua Arabic class.
"""
tasks = {}
for task_class in XNLI_TASKS:
benchmark = task_class.DATASET_PATH
lang = task_class.DATASET_NAME
tasks[f"{benchmark}_{lang}"] = task_class
return tasks
12 changes: 10 additions & 2 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,13 @@ def parse_args():
action="store_true",
help="Whether to cache your model's predictions or not",
)
# Need it for deepspeed inference
parser.add_argument(
"--local_rank",
default=0,
type=int,
help="used by dist launchers"
)
return parser.parse_args()


Expand Down Expand Up @@ -204,9 +211,10 @@ def main():
with open(f"./outputs/slim{path_separator}{output_path}.json", "w") as f:
json.dump(agg2slim(results), f, indent=2)

print(f"\n{evaluator.make_table(results)}")
if args.local_rank == 0:
print(f"\n{evaluator.make_table(results)}")

if not args.no_tracking:
if not args.no_tracking and args.local_rank == 0:
emissions_output_path = f"./outputs/emissions{path_separator}{output_path}.csv"
os.rename("emissions.csv", emissions_output_path)

Expand Down