explosion · rmitsch · Jan 5, 2024 · Jan 6, 2024 · Jan 23, 2024 · Jan 29, 2024
diff --git a/spacy_llm/models/hf/__init__.py b/spacy_llm/models/hf/__init__.py
@@ -3,7 +3,9 @@
 from .falcon import falcon_hf
 from .llama2 import llama2_hf
 from .mistral import mistral_hf
+from .mixtral import mixtral_hf
 from .openllama import openllama_hf
+from .phi2 import phi2_hf
 from .stablelm import stablelm_hf
 
 __all__ = [
@@ -12,6 +14,8 @@
     "falcon_hf",
     "llama2_hf",
     "mistral_hf",
+    "mixtral_hf",
     "openllama_hf",
+    "phi2_hf",
     "stablelm_hf",
 ]
diff --git a/spacy_llm/models/hf/base.py b/spacy_llm/models/hf/base.py
@@ -69,6 +69,14 @@ def __init__(
                     f"Double-check you specified a valid dtype."
                 ) from ex
 
+        # Recognize boolean attributes.
+        for key, value in self._config_init.items():
+            if value in ("True", "False"):
+                self._config_init[key] = False if value == "False" else True
+        for key, value in self._config_run.items():
+            if value in ("True", "False"):
+                self._config_run[key] = False if value == "False" else True
+
         # Init HF model.
         HuggingFace.check_installation()
         self._check_model()

diff --git a/spacy_llm/models/hf/mistral.py b/spacy_llm/models/hf/mistral.py
@@ -65,7 +65,7 @@ def __call__(self, prompts: Iterable[Iterable[str]]) -> Iterable[Iterable[str]]:
 
             tokenized_input_ids = [
                 self._tokenizer(
-                    prompt if not self._is_instruct else f"<s>[INST] {prompt} [/INST]",
+                    prompt if not self._is_instruct else f"[INST] {prompt} [/INST]",
                     return_tensors="pt",
                 ).input_ids
                 for prompt in prompts_for_doc
@@ -96,11 +96,10 @@ def mistral_hf(
     config_run: Optional[Dict[str, Any]] = SimpleFrozenDict(),
 ) -> Callable[[Iterable[Iterable[str]]], Iterable[Iterable[str]]]:
     """Generates Mistral instance that can execute a set of prompts and return the raw responses.
-    name (Literal): Name of the Falcon model. Has to be one of Falcon.get_model_names().
+    name (Literal): Name of the Mistral model. Has to be one of Falcon.get_model_names().
     config_init (Optional[Dict[str, Any]]): HF config for initializing the model.
     config_run (Optional[Dict[str, Any]]): HF config for running the model.
-    RETURNS (Callable[[Iterable[str]], Iterable[str]]): Falcon instance that can execute a set of prompts and return
-        the raw responses.
+    RETURNS (Mistral): Mistral instance that can execute a set of prompts and return the raw responses.
     """
     return Mistral(
         name=name, config_init=config_init, config_run=config_run, context_length=8000

diff --git a/spacy_llm/models/hf/mixtral.py b/spacy_llm/models/hf/mixtral.py
@@ -0,0 +1,108 @@
+from typing import Any, Callable, Dict, Iterable, List, Optional
+
+from confection import SimpleFrozenDict
+
+from ...compat import Literal, transformers
+from ...registry.util import registry
+from .base import HuggingFace
+
+
+class Mixtral(HuggingFace):
+    MODEL_NAMES = Literal[
+        "Mixtral-8x7B-v0.1", "Mixtral-8x7B-Instruct-v0.1"
+    ]  # noqa: F722
+
+    def __init__(
+        self,
+        name: MODEL_NAMES,
+        config_init: Optional[Dict[str, Any]],
+        config_run: Optional[Dict[str, Any]],
+        context_length: Optional[int],
+    ):
+        self._tokenizer: Optional["transformers.AutoTokenizer"] = None
+        self._is_instruct = "instruct" in name
+        super().__init__(
+            name=name,
+            config_init=config_init,
+            config_run=config_run,
+            context_length=context_length,
+        )
+
+        assert isinstance(self._tokenizer, transformers.PreTrainedTokenizerBase)
+
+        # Instantiate GenerationConfig object from config dict.
+        self._hf_config_run = transformers.GenerationConfig.from_pretrained(
+            self._name, **self._config_run
+        )
+        # To avoid deprecation warning regarding usage of `max_length`.
+        self._hf_config_run.max_new_tokens = self._hf_config_run.max_length
+
+    def init_model(self) -> Any:
+        self._tokenizer = transformers.AutoTokenizer.from_pretrained(self._name)
+        init_cfg = self._config_init
+        device: Optional[str] = None
+        if "device" in init_cfg:
+            device = init_cfg.pop("device")
+
+        model = transformers.AutoModelForCausalLM.from_pretrained(
+            self._name, **init_cfg, resume_download=True
+        )
+        if device:
+            model.to(device)
+
+        return model
+
+    @property
+    def hf_account(self) -> str:
+        return "mistralai"
+
+    def __call__(self, prompts: Iterable[Iterable[str]]) -> Iterable[Iterable[str]]:  # type: ignore[override]
+        assert callable(self._tokenizer)
+        assert hasattr(self._model, "generate")
+        assert hasattr(self._tokenizer, "batch_decode")
+        responses: List[List[str]] = []
+
+        for prompts_for_doc in prompts:
+            prompts_for_doc = list(prompts_for_doc)
+
+            tokenized_input_ids = [
+                self._tokenizer(
+                    prompt if not self._is_instruct else f"[INST] {prompt} [/INST]",
+                    return_tensors="pt",
+                ).input_ids
+                for prompt in prompts_for_doc
+            ]
+            tokenized_input_ids = [
+                tp.to(self._model.device) for tp in tokenized_input_ids
+            ]
+
+            responses.append(
+                [
+                    self._tokenizer.decode(
+                        self._model.generate(
+                            input_ids=tok_ii, generation_config=self._hf_config_run
+                        )[:, tok_ii.shape[1] :][0],
+                        skip_special_tokens=True,
+                    )
+                    for tok_ii in tokenized_input_ids
+                ]
+            )
+
+        return responses
+
+
+@registry.llm_models("spacy.Mixtral.v1")
+def mixtral_hf(
+    name: Mixtral.MODEL_NAMES,
+    config_init: Optional[Dict[str, Any]] = SimpleFrozenDict(),
+    config_run: Optional[Dict[str, Any]] = SimpleFrozenDict(),
+) -> Callable[[Iterable[Iterable[str]]], Iterable[Iterable[str]]]:
+    """Generates Mixtral instance that can execute a set of prompts and return the raw responses.
+    name (Literal): Name of the Mixtral model. Has to be one of Mixtral.get_model_names().
+    config_init (Optional[Dict[str, Any]]): HF config for initializing the model.
+    config_run (Optional[Dict[str, Any]]): HF config for running the model.
+    RETURNS (Mixtral): Mixtral instance that can execute a set of prompts and return the raw responses.
+    """
+    return Mixtral(
+        name=name, config_init=config_init, config_run=config_run, context_length=8000
+    )
diff --git a/spacy_llm/models/hf/phi2.py b/spacy_llm/models/hf/phi2.py
@@ -0,0 +1,115 @@
+from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple
+
+from confection import SimpleFrozenDict
+
+from ...compat import Literal, transformers
+from ...registry.util import registry
+from .base import HuggingFace
+
+
+class Phi2(HuggingFace):
+    MODEL_NAMES = Literal["phi-2"]  # noqa: F722
+
+    def __init__(
+        self,
+        name: str,
+        config_init: Optional[Dict[str, Any]],
+        config_run: Optional[Dict[str, Any]],
+        context_length: Optional[int],
+    ):
+        self._tokenizer: Optional["transformers.AutoTokenizer"] = None
+        super().__init__(
+            name=name,
+            config_init=config_init,
+            config_run=config_run,
+            context_length=context_length,
+        )
+
+    def init_model(self) -> "transformers.AutoModelForCausalLM":
+        """Sets up HF model and needed utilities.
+        RETURNS (Any): HF model.
+        """
+        # Initialize tokenizer and model.
+        self._tokenizer = transformers.AutoTokenizer.from_pretrained(
+            self._name, trust_remote_code=True
+        )
+        init_cfg = self._config_init
+        device: Optional[str] = None
+        if "device" in init_cfg:
+            device = init_cfg.pop("device")
+
+        model = transformers.AutoModelForCausalLM.from_pretrained(
+            self._name, **init_cfg
+        )
+        if device:
+            model.to(device)
+
+        return model
+
+    def __call__(self, prompts: Iterable[Iterable[str]]) -> Iterable[Iterable[str]]:  # type: ignore[override]
+        assert callable(self._tokenizer)
+        responses: List[List[str]] = []
+
+        for prompts_for_doc in prompts:
+            tokenized_input_ids = [
+                self._tokenizer(
+                    prompt, return_tensors="pt", return_attention_mask=False
+                ).input_ids
+                for prompt in prompts_for_doc
+            ]
+            tokenized_input_ids = [
+                tii.to(self._model.device) for tii in tokenized_input_ids
+            ]
+
+            assert hasattr(self._model, "generate")
+            responses.append(
+                [
+                    self._tokenizer.decode(
+                        self._model.generate(input_ids=tii, **self._config_run)[
+                            :, tii.shape[1] :
+                        ][0],
+                    )
+                    for tii in tokenized_input_ids
+                ]
+            )
+
+        return responses
+
+    @property
+    def hf_account(self) -> str:
+        return "microsoft"
+
+    @staticmethod
+    def compile_default_configs() -> Tuple[Dict[str, Any], Dict[str, Any]]:
+        # See https://huggingface.co/microsoft/phi-2#sample-code for recommended setting combinations.
+        default_cfg_init, default_cfg_run = HuggingFace.compile_default_configs()
+        return (
+            {
+                **default_cfg_init,
+                "torch_dtype": "auto",
+                "device_map": "cuda",
+                "trust_remote_code": True,
+            },
+            {
+                **default_cfg_run,
+                "max_new_tokens": 200,
+            },
+        )
+
+
+@registry.llm_models("spacy.Phi-2.v1")
+def phi2_hf(
+    name: Phi2.MODEL_NAMES,
+    config_init: Optional[Dict[str, Any]] = SimpleFrozenDict(),
+    config_run: Optional[Dict[str, Any]] = SimpleFrozenDict(),
+) -> Callable[[Iterable[Iterable[str]]], Iterable[Iterable[str]]]:
+    """Generates OpenLLaMA instance that can execute a set of prompts and return the raw responses.
+    name (Literal): Name of the OpenLLaMA model. Has to be one of OpenLLaMA.get_model_names().
+    config_init (Optional[Dict[str, Any]]): HF config for initializing the model.
+    config_run (Optional[Dict[str, Any]]): HF config for running the model.
+    RETURNS (Callable[[Iterable[str]], Iterable[str]]): OpenLLaMA instance that can execute a set of prompts and return
+        the raw responses.
+    """
+    return Phi2(
+        name=name, config_init=config_init, config_run=config_run, context_length=2048
+    )
diff --git a/spacy_llm/tests/models/test_mixtral.py b/spacy_llm/tests/models/test_mixtral.py
@@ -0,0 +1,70 @@
+import copy
+
+import pytest
+import spacy
+from confection import Config  # type: ignore[import]
+from thinc.compat import has_torch_cuda_gpu
+
+from ...compat import torch
+
+_PIPE_CFG = {
+    "model": {
+        "@llm_models": "spacy.Mixtral.v1",
+        "name": "Mixtral-8x7B-Instruct-v0.1",
+    },
+    "task": {"@llm_tasks": "spacy.NoOp.v1"},
+}
+
+_NLP_CONFIG = """
+
+[nlp]
+lang = "en"
+pipeline = ["llm"]
+batch_size = 128
+
+[components]
+
+[components.llm]
+factory = "llm"
+
+[components.llm.task]
+@llm_tasks = "spacy.NoOp.v1"
+
+[components.llm.model]
+@llm_models = "spacy.Mixtral.v1"
+name = "Mixtral-8x7B-Instruct-v0.1.1"
+"""
+
+
+@pytest.mark.gpu
+@pytest.mark.skip(reason="CI runner needs more GPU memory")
+@pytest.mark.skipif(not has_torch_cuda_gpu, reason="needs GPU & CUDA")
+def test_init():
+    """Test initialization and simple run."""
+    nlp = spacy.blank("en")
+    cfg = copy.deepcopy(_PIPE_CFG)
+    nlp.add_pipe("llm", config=cfg)
+    nlp("This is a test.")
+    torch.cuda.empty_cache()
+
+
+@pytest.mark.gpu
+@pytest.mark.skip(reason="CI runner needs more GPU memory")
+@pytest.mark.skipif(not has_torch_cuda_gpu, reason="needs GPU & CUDA")
+def test_init_from_config():
+    orig_config = Config().from_str(_NLP_CONFIG)
+    nlp = spacy.util.load_model_from_config(orig_config, auto_fill=True)
+    assert nlp.pipe_names == ["llm"]
+    torch.cuda.empty_cache()
+
+
+@pytest.mark.gpu
+@pytest.mark.skip(reason="CI runner needs more GPU memory")
+@pytest.mark.skipif(not has_torch_cuda_gpu, reason="needs GPU & CUDA")
+def test_invalid_model():
+    orig_config = Config().from_str(_NLP_CONFIG)
+    config = copy.deepcopy(orig_config)
+    config["components"]["llm"]["model"]["name"] = "x"
+    with pytest.raises(ValueError, match="unexpected value; permitted"):
+        spacy.util.load_model_from_config(config, auto_fill=True)
+    torch.cuda.empty_cache()