diff --git a/lm_eval/models/huggingface.py b/lm_eval/models/huggingface.py index 16af71806ad..07566bfb28e 100644 --- a/lm_eval/models/huggingface.py +++ b/lm_eval/models/huggingface.py @@ -1,7 +1,9 @@ +import os import math import torch import torch.nn.functional as F import transformers +import deepspeed from typing import List, Mapping, NewType, Optional, Tuple, Union from tqdm import tqdm @@ -77,6 +79,7 @@ def __init__( offload_folder: Optional[str] = "./offload", dtype: Optional[Union[str, torch.dtype]] = None, device: Optional[Union[int, str]] = "cuda", + use_deepspeed: Optional[bool] = False ): """Initializes a HuggingFace `AutoModel` and `AutoTokenizer` for evaluation. @@ -147,6 +150,7 @@ def __init__( self._batch_size = batch_size # TODO: Adaptive batch size self._max_gen_toks = max_gen_toks self._max_length = max_length + self.use_deepspeed = use_deepspeed self._config = self.AUTO_CONFIG_CLASS.from_pretrained( pretrained, revision=revision + ("/" + subfolder if subfolder is not None else ""), @@ -185,7 +189,7 @@ def __init__( # the user specified one so we force `self._device` to be the same as # `lm_head`'s. self._device = self.model.hf_device_map["lm_head"] - if not use_accelerate: + if not use_accelerate and not use_deepspeed: self.model.to(self._device) def _create_auto_model( @@ -199,15 +203,38 @@ def _create_auto_model( offload_folder: Optional[str] = None, torch_dtype: Optional[Union[str, torch.dtype]] = None, ) -> transformers.AutoModel: - """Returns a pre-trained pytorch model from a pre-trained model configuration.""" - model = self.AUTO_MODEL_CLASS.from_pretrained( - pretrained, - revision=revision + ("/" + subfolder if subfolder is not None else ""), - device_map=device_map, - max_memory=max_memory, - offload_folder=offload_folder, - torch_dtype=torch_dtype, - ) + if self.use_deepspeed: + # Deepspeed iniatilization + world_size = int(os.getenv("WORLD_SIZE", "1")) + deepspeed.init_distributed("nccl") + + with deepspeed.OnDevice(dtype=torch.float16, device="meta"): + model = self.AUTO_MODEL_CLASS.from_config(self._config, torch_dtype=torch.bfloat16) + + model = model.eval() + checkpoints_json = os.path.join(pretrained, "ds_inference_config.json") + tp_config = deepspeed.inference.config.DeepSpeedTPConfig() + tp_config.tp_size = world_size + + model = deepspeed.init_inference( + model, + tensor_parallel=tp_config, + base_dir=pretrained, + dtype=torch_dtype, + checkpoint=checkpoints_json, + replace_with_kernel_inject=True, + + ) + else: + """Returns a pre-trained pytorch model from a pre-trained model configuration.""" + model = self.AUTO_MODEL_CLASS.from_pretrained( + pretrained, + revision=revision + ("/" + subfolder if subfolder is not None else ""), + device_map=device_map, + max_memory=max_memory, + offload_folder=offload_folder, + torch_dtype=torch_dtype, + ) return model def _create_auto_tokenizer( diff --git a/lm_eval/tasks/__init__.py b/lm_eval/tasks/__init__.py index 03d24e52d82..b9c9773db9d 100644 --- a/lm_eval/tasks/__init__.py +++ b/lm_eval/tasks/__init__.py @@ -35,6 +35,7 @@ from . import tydiqa from . import wino_bias from . import wmt +from . import xnli from . import xquad @@ -147,6 +148,9 @@ # Bias-Shades # Format: `bias_shades_{lang}` **bias_shades.construct_tasks(), + # XNLI + # Format: `xnli_{lang}` + **xnli.construct_tasks(), # BLiMP "blimp_adjunct_island": blimp.BlimpAdjunctIsland, "blimp_anaphor_gender_agreement": blimp.BlimpAnaphorGenderAgreement, diff --git a/lm_eval/tasks/xnli.py b/lm_eval/tasks/xnli.py new file mode 100644 index 00000000000..e54139573da --- /dev/null +++ b/lm_eval/tasks/xnli.py @@ -0,0 +1,74 @@ +""" +XNLI is an evaluation corpus for language transfer and cross-lingual sentence classification in 15 languages. +https://arxiv.org/abs/1809.05053 +Homepage: None, Repo: https://github.com/facebookresearch/XNLI +""" +import typing + +from lm_eval.api.task import PromptSourceTask + + +_CITATION = """ +@inproceedings{conneau2018xnli, + title={XNLI: Evaluating Cross-lingual Sentence Representations}, + author={Conneau, Alexis and Rinott, Ruty and Lample, Guillaume and Williams, Adina and Bowman, Samuel and Schwenk, Holger and Stoyanov, Veselin}, + booktitle={Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing}, + pages={2475--2485}, + year={2018} +} +}""" + + +class XNLI(PromptSourceTask): + VERSION = 1 + DATASET_PATH = "xnli" + DATASET_NAME = None + + def has_training_docs(self): + return True + + def has_validation_docs(self): + return True + + def has_test_docs(self): + return True + + def training_docs(self): + if self.has_training_docs(): + return self.dataset["train"] + + def validation_docs(self): + if self.has_validation_docs(): + return self.dataset["validation"] + + def test_docs(self): + if self.has_test_docs(): + return self.dataset["test"] + + +class XNLIEn(XNLI): + DATASET_NAME = "en" + + +class XNLIFr(XNLI): + DATASET_NAME = "fr" + + +XNLI_TASKS = [ + XNLIEn, + XNLIFr, +] + + +def construct_tasks() -> typing.Dict[str, XNLI]: + """ + Returns a dictionary of tasks keyed by task name, for example: + "GEM/wiki_lingua_ar" + will dispatch to the GEM WikiLingua Arabic class. + """ + tasks = {} + for task_class in XNLI_TASKS: + benchmark = task_class.DATASET_PATH + lang = task_class.DATASET_NAME + tasks[f"{benchmark}_{lang}"] = task_class + return tasks diff --git a/main.py b/main.py index ffd8633e710..d02fe5a1013 100644 --- a/main.py +++ b/main.py @@ -103,6 +103,13 @@ def parse_args(): action="store_true", help="Whether to cache your model's predictions or not", ) + # Need it for deepspeed inference + parser.add_argument( + "--local_rank", + default=0, + type=int, + help="used by dist launchers" + ) return parser.parse_args() @@ -204,9 +211,10 @@ def main(): with open(f"./outputs/slim{path_separator}{output_path}.json", "w") as f: json.dump(agg2slim(results), f, indent=2) - print(f"\n{evaluator.make_table(results)}") + if args.local_rank == 0: + print(f"\n{evaluator.make_table(results)}") - if not args.no_tracking: + if not args.no_tracking and args.local_rank == 0: emissions_output_path = f"./outputs/emissions{path_separator}{output_path}.csv" os.rename("emissions.csv", emissions_output_path)