From 4fee761e3d12269726bf1c9e9ee30d31ffddf83d Mon Sep 17 00:00:00 2001 From: Bola Malek Date: Thu, 18 Apr 2024 14:30:35 -0700 Subject: [PATCH 1/2] DistilBERT --- distilbert/README.md | 15 +++++++++++++++ distilbert/config.yaml | 6 ++++++ distilbert/model/__init__.py | 0 distilbert/model/model.py | 30 ++++++++++++++++++++++++++++++ distilbert/requirements.txt | 3 +++ 5 files changed, 54 insertions(+) create mode 100644 distilbert/README.md create mode 100644 distilbert/config.yaml create mode 100644 distilbert/model/__init__.py create mode 100644 distilbert/model/model.py create mode 100644 distilbert/requirements.txt diff --git a/distilbert/README.md b/distilbert/README.md new file mode 100644 index 000000000..d536ff448 --- /dev/null +++ b/distilbert/README.md @@ -0,0 +1,15 @@ +# DistilBERT +This truss runs the [DistilBERT](https://huggingface.co/docs/transformers/en/model_doc/distilbert) model as an endpoint on Baseten. + +## Deploy +``` +pip install --upgrade truss +truss push --publish # grab an api key from https://app.baseten.co/settings/api_keys +``` + +The deployment will take a few minutes the first. Once it's ready in the you UI you can proceed to calling the API. + +## Test +``` +truss predict --published -d '{"text": "some text to embed"}' +``` \ No newline at end of file diff --git a/distilbert/config.yaml b/distilbert/config.yaml new file mode 100644 index 000000000..4f71a1748 --- /dev/null +++ b/distilbert/config.yaml @@ -0,0 +1,6 @@ + +model_name: DistilBert +python_version: py310 +requirements_file: ./requirements.txt +resources: + accelerator: T4 \ No newline at end of file diff --git a/distilbert/model/__init__.py b/distilbert/model/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/distilbert/model/model.py b/distilbert/model/model.py new file mode 100644 index 000000000..e6f0dc4e1 --- /dev/null +++ b/distilbert/model/model.py @@ -0,0 +1,30 @@ +import torch +from transformers import AutoTokenizer, AutoModel + + +class Model: + def __init__(self, **kwargs): + self._model = None + + def load(self): + # Load model here and assign to self._model. + self.device = ( + "cuda" if torch.cuda.is_available() else "mps" + ) # the device to load the model onto + + self._tokenizer = AutoTokenizer.from_pretrained( + "distilbert/distilbert-base-uncased", device=self.device + ) + self._model = AutoModel.from_pretrained( + "distilbert/distilbert-base-uncased", + torch_dtype=torch.float16, + ).to(self.device) + + def predict(self, model_input): + # Run model inference here + + text = model_input.get("text") + + encoded_input = self._tokenizer(text, return_tensors='pt').to(self.device) + + return self._model(**encoded_input).last_hidden_state.tolist() diff --git a/distilbert/requirements.txt b/distilbert/requirements.txt new file mode 100644 index 000000000..c42de737b --- /dev/null +++ b/distilbert/requirements.txt @@ -0,0 +1,3 @@ +hf-transfer==0.1.6 +torch==2.2.2 +transformers==4.40.0 From e6d3a06a03b79a64bf27be2a8298341fab915cf1 Mon Sep 17 00:00:00 2001 From: Bola Malek Date: Thu, 18 Apr 2024 14:34:39 -0700 Subject: [PATCH 2/2] lint --- distilbert/config.yaml | 2 +- distilbert/model/model.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/distilbert/config.yaml b/distilbert/config.yaml index 4f71a1748..3f15fbd8d 100644 --- a/distilbert/config.yaml +++ b/distilbert/config.yaml @@ -3,4 +3,4 @@ model_name: DistilBert python_version: py310 requirements_file: ./requirements.txt resources: - accelerator: T4 \ No newline at end of file + accelerator: T4 diff --git a/distilbert/model/model.py b/distilbert/model/model.py index e6f0dc4e1..ede40ef2f 100644 --- a/distilbert/model/model.py +++ b/distilbert/model/model.py @@ -9,7 +9,7 @@ def __init__(self, **kwargs): def load(self): # Load model here and assign to self._model. self.device = ( - "cuda" if torch.cuda.is_available() else "mps" + "cuda" if torch.cuda.is_available() else "cpu" ) # the device to load the model onto self._tokenizer = AutoTokenizer.from_pretrained(