Skip to content
Draft
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions spacy_llm/models/hf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
from .falcon import falcon_hf
from .llama2 import llama2_hf
from .mistral import mistral_hf
from .mixtral import mixtral_hf
from .openllama import openllama_hf
from .phi2 import phi2_hf
from .stablelm import stablelm_hf

__all__ = [
Expand All @@ -12,6 +14,8 @@
"falcon_hf",
"llama2_hf",
"mistral_hf",
"mixtral_hf",
"openllama_hf",
"phi2_hf",
"stablelm_hf",
]
8 changes: 8 additions & 0 deletions spacy_llm/models/hf/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,14 @@ def __init__(
f"Double-check you specified a valid dtype."
) from ex

# Recognize boolean attributes.
for key, value in self._config_init.items():
if value in ("True", "False"):
self._config_init[key] = False if value == "False" else True
for key, value in self._config_run.items():
if value in ("True", "False"):
self._config_run[key] = False if value == "False" else True

# Init HF model.
HuggingFace.check_installation()
self._check_model()
Expand Down
7 changes: 3 additions & 4 deletions spacy_llm/models/hf/mistral.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def __call__(self, prompts: Iterable[Iterable[str]]) -> Iterable[Iterable[str]]:

tokenized_input_ids = [
self._tokenizer(
prompt if not self._is_instruct else f"<s>[INST] {prompt} [/INST]",
prompt if not self._is_instruct else f"[INST] {prompt} [/INST]",
return_tensors="pt",
).input_ids
for prompt in prompts_for_doc
Expand Down Expand Up @@ -96,11 +96,10 @@ def mistral_hf(
config_run: Optional[Dict[str, Any]] = SimpleFrozenDict(),
) -> Callable[[Iterable[Iterable[str]]], Iterable[Iterable[str]]]:
"""Generates Mistral instance that can execute a set of prompts and return the raw responses.
name (Literal): Name of the Falcon model. Has to be one of Falcon.get_model_names().
name (Literal): Name of the Mistral model. Has to be one of Falcon.get_model_names().
config_init (Optional[Dict[str, Any]]): HF config for initializing the model.
config_run (Optional[Dict[str, Any]]): HF config for running the model.
RETURNS (Callable[[Iterable[str]], Iterable[str]]): Falcon instance that can execute a set of prompts and return
the raw responses.
RETURNS (Mistral): Mistral instance that can execute a set of prompts and return the raw responses.
"""
return Mistral(
name=name, config_init=config_init, config_run=config_run, context_length=8000
Expand Down
108 changes: 108 additions & 0 deletions spacy_llm/models/hf/mixtral.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
from typing import Any, Callable, Dict, Iterable, List, Optional

from confection import SimpleFrozenDict

from ...compat import Literal, transformers
from ...registry.util import registry
from .base import HuggingFace


class Mixtral(HuggingFace):
MODEL_NAMES = Literal[
"Mixtral-8x7B-v0.1", "Mixtral-8x7B-Instruct-v0.1"
] # noqa: F722

def __init__(
self,
name: MODEL_NAMES,
config_init: Optional[Dict[str, Any]],
config_run: Optional[Dict[str, Any]],
context_length: Optional[int],
):
self._tokenizer: Optional["transformers.AutoTokenizer"] = None
self._is_instruct = "instruct" in name
super().__init__(
name=name,
config_init=config_init,
config_run=config_run,
context_length=context_length,
)

assert isinstance(self._tokenizer, transformers.PreTrainedTokenizerBase)

# Instantiate GenerationConfig object from config dict.
self._hf_config_run = transformers.GenerationConfig.from_pretrained(
self._name, **self._config_run
)
# To avoid deprecation warning regarding usage of `max_length`.
self._hf_config_run.max_new_tokens = self._hf_config_run.max_length

def init_model(self) -> Any:
self._tokenizer = transformers.AutoTokenizer.from_pretrained(self._name)
init_cfg = self._config_init
device: Optional[str] = None
if "device" in init_cfg:
device = init_cfg.pop("device")

model = transformers.AutoModelForCausalLM.from_pretrained(
self._name, **init_cfg, resume_download=True
)
if device:
model.to(device)

return model

@property
def hf_account(self) -> str:
return "mistralai"

def __call__(self, prompts: Iterable[Iterable[str]]) -> Iterable[Iterable[str]]: # type: ignore[override]
assert callable(self._tokenizer)
assert hasattr(self._model, "generate")
assert hasattr(self._tokenizer, "batch_decode")
responses: List[List[str]] = []

for prompts_for_doc in prompts:
prompts_for_doc = list(prompts_for_doc)

tokenized_input_ids = [
self._tokenizer(
prompt if not self._is_instruct else f"[INST] {prompt} [/INST]",
return_tensors="pt",
).input_ids
for prompt in prompts_for_doc
]
tokenized_input_ids = [
tp.to(self._model.device) for tp in tokenized_input_ids
]

responses.append(
[
self._tokenizer.decode(
self._model.generate(
input_ids=tok_ii, generation_config=self._hf_config_run
)[:, tok_ii.shape[1] :][0],
skip_special_tokens=True,
)
for tok_ii in tokenized_input_ids
]
)

return responses


@registry.llm_models("spacy.Mixtral.v1")
def mixtral_hf(
name: Mixtral.MODEL_NAMES,
config_init: Optional[Dict[str, Any]] = SimpleFrozenDict(),
config_run: Optional[Dict[str, Any]] = SimpleFrozenDict(),
) -> Callable[[Iterable[Iterable[str]]], Iterable[Iterable[str]]]:
"""Generates Mixtral instance that can execute a set of prompts and return the raw responses.
name (Literal): Name of the Mixtral model. Has to be one of Mixtral.get_model_names().
config_init (Optional[Dict[str, Any]]): HF config for initializing the model.
config_run (Optional[Dict[str, Any]]): HF config for running the model.
RETURNS (Mixtral): Mixtral instance that can execute a set of prompts and return the raw responses.
"""
return Mixtral(
name=name, config_init=config_init, config_run=config_run, context_length=8000
)
115 changes: 115 additions & 0 deletions spacy_llm/models/hf/phi2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple

from confection import SimpleFrozenDict

from ...compat import Literal, transformers
from ...registry.util import registry
from .base import HuggingFace


class Phi2(HuggingFace):
MODEL_NAMES = Literal["phi-2"] # noqa: F722

def __init__(
self,
name: str,
config_init: Optional[Dict[str, Any]],
config_run: Optional[Dict[str, Any]],
context_length: Optional[int],
):
self._tokenizer: Optional["transformers.AutoTokenizer"] = None
super().__init__(
name=name,
config_init=config_init,
config_run=config_run,
context_length=context_length,
)

def init_model(self) -> "transformers.AutoModelForCausalLM":
"""Sets up HF model and needed utilities.
RETURNS (Any): HF model.
"""
# Initialize tokenizer and model.
self._tokenizer = transformers.AutoTokenizer.from_pretrained(
self._name, trust_remote_code=True
)
init_cfg = self._config_init
device: Optional[str] = None
if "device" in init_cfg:
device = init_cfg.pop("device")

model = transformers.AutoModelForCausalLM.from_pretrained(
self._name, **init_cfg
)
if device:
model.to(device)

return model

def __call__(self, prompts: Iterable[Iterable[str]]) -> Iterable[Iterable[str]]: # type: ignore[override]
assert callable(self._tokenizer)
responses: List[List[str]] = []

for prompts_for_doc in prompts:
tokenized_input_ids = [
self._tokenizer(
prompt, return_tensors="pt", return_attention_mask=False
).input_ids
for prompt in prompts_for_doc
]
tokenized_input_ids = [
tii.to(self._model.device) for tii in tokenized_input_ids
]

assert hasattr(self._model, "generate")
responses.append(
[
self._tokenizer.decode(
self._model.generate(input_ids=tii, **self._config_run)[
:, tii.shape[1] :
][0],
)
for tii in tokenized_input_ids
]
)

return responses

@property
def hf_account(self) -> str:
return "microsoft"

@staticmethod
def compile_default_configs() -> Tuple[Dict[str, Any], Dict[str, Any]]:
# See https://huggingface.co/microsoft/phi-2#sample-code for recommended setting combinations.
default_cfg_init, default_cfg_run = HuggingFace.compile_default_configs()
return (
{
**default_cfg_init,
"torch_dtype": "auto",
"device_map": "cuda",
"trust_remote_code": True,
},
{
**default_cfg_run,
"max_new_tokens": 200,
},
)


@registry.llm_models("spacy.Phi-2.v1")
def phi2_hf(
name: Phi2.MODEL_NAMES,
config_init: Optional[Dict[str, Any]] = SimpleFrozenDict(),
config_run: Optional[Dict[str, Any]] = SimpleFrozenDict(),
) -> Callable[[Iterable[Iterable[str]]], Iterable[Iterable[str]]]:
"""Generates OpenLLaMA instance that can execute a set of prompts and return the raw responses.
name (Literal): Name of the OpenLLaMA model. Has to be one of OpenLLaMA.get_model_names().
config_init (Optional[Dict[str, Any]]): HF config for initializing the model.
config_run (Optional[Dict[str, Any]]): HF config for running the model.
RETURNS (Callable[[Iterable[str]], Iterable[str]]): OpenLLaMA instance that can execute a set of prompts and return
the raw responses.
"""
return Phi2(
name=name, config_init=config_init, config_run=config_run, context_length=2048
)
70 changes: 70 additions & 0 deletions spacy_llm/tests/models/test_mixtral.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import copy

import pytest
import spacy
from confection import Config # type: ignore[import]
from thinc.compat import has_torch_cuda_gpu

from ...compat import torch

_PIPE_CFG = {
"model": {
"@llm_models": "spacy.Mixtral.v1",
"name": "Mixtral-8x7B-Instruct-v0.1",
},
"task": {"@llm_tasks": "spacy.NoOp.v1"},
}

_NLP_CONFIG = """

[nlp]
lang = "en"
pipeline = ["llm"]
batch_size = 128

[components]

[components.llm]
factory = "llm"

[components.llm.task]
@llm_tasks = "spacy.NoOp.v1"

[components.llm.model]
@llm_models = "spacy.Mixtral.v1"
name = "Mixtral-8x7B-Instruct-v0.1.1"
"""


@pytest.mark.gpu
@pytest.mark.skip(reason="CI runner needs more GPU memory")
@pytest.mark.skipif(not has_torch_cuda_gpu, reason="needs GPU & CUDA")
def test_init():
"""Test initialization and simple run."""
nlp = spacy.blank("en")
cfg = copy.deepcopy(_PIPE_CFG)
nlp.add_pipe("llm", config=cfg)
nlp("This is a test.")
torch.cuda.empty_cache()


@pytest.mark.gpu
@pytest.mark.skip(reason="CI runner needs more GPU memory")
@pytest.mark.skipif(not has_torch_cuda_gpu, reason="needs GPU & CUDA")
def test_init_from_config():
orig_config = Config().from_str(_NLP_CONFIG)
nlp = spacy.util.load_model_from_config(orig_config, auto_fill=True)
assert nlp.pipe_names == ["llm"]
torch.cuda.empty_cache()


@pytest.mark.gpu
@pytest.mark.skip(reason="CI runner needs more GPU memory")
@pytest.mark.skipif(not has_torch_cuda_gpu, reason="needs GPU & CUDA")
def test_invalid_model():
orig_config = Config().from_str(_NLP_CONFIG)
config = copy.deepcopy(orig_config)
config["components"]["llm"]["model"]["name"] = "x"
with pytest.raises(ValueError, match="unexpected value; permitted"):
spacy.util.load_model_from_config(config, auto_fill=True)
torch.cuda.empty_cache()
Loading