From 4fee761e3d12269726bf1c9e9ee30d31ffddf83d Mon Sep 17 00:00:00 2001
From: Bola Malek <bola@baseten.co>
Date: Thu, 18 Apr 2024 14:30:35 -0700
Subject: [PATCH 1/2] DistilBERT

---
 distilbert/README.md         | 15 +++++++++++++++
 distilbert/config.yaml       |  6 ++++++
 distilbert/model/__init__.py |  0
 distilbert/model/model.py    | 30 ++++++++++++++++++++++++++++++
 distilbert/requirements.txt  |  3 +++
 5 files changed, 54 insertions(+)
 create mode 100644 distilbert/README.md
 create mode 100644 distilbert/config.yaml
 create mode 100644 distilbert/model/__init__.py
 create mode 100644 distilbert/model/model.py
 create mode 100644 distilbert/requirements.txt

diff --git a/distilbert/README.md b/distilbert/README.md
new file mode 100644
index 000000000..d536ff448
--- /dev/null
+++ b/distilbert/README.md
@@ -0,0 +1,15 @@
+# DistilBERT
+This truss runs the [DistilBERT](https://huggingface.co/docs/transformers/en/model_doc/distilbert) model as an endpoint on Baseten.
+
+## Deploy
+```
+pip install --upgrade truss
+truss push --publish # grab an api key from https://app.baseten.co/settings/api_keys
+```
+
+The deployment will take a few minutes the first. Once it's ready in the you UI you can proceed to calling the API.
+
+## Test
+```
+truss predict --published -d '{"text": "some text to embed"}'
+```
\ No newline at end of file
diff --git a/distilbert/config.yaml b/distilbert/config.yaml
new file mode 100644
index 000000000..4f71a1748
--- /dev/null
+++ b/distilbert/config.yaml
@@ -0,0 +1,6 @@
+
+model_name: DistilBert
+python_version: py310
+requirements_file: ./requirements.txt
+resources:
+  accelerator: T4
\ No newline at end of file
diff --git a/distilbert/model/__init__.py b/distilbert/model/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/distilbert/model/model.py b/distilbert/model/model.py
new file mode 100644
index 000000000..e6f0dc4e1
--- /dev/null
+++ b/distilbert/model/model.py
@@ -0,0 +1,30 @@
+import torch
+from transformers import AutoTokenizer, AutoModel
+
+
+class Model:
+    def __init__(self, **kwargs):
+        self._model = None
+
+    def load(self):
+        # Load model here and assign to self._model.
+        self.device = (
+            "cuda" if torch.cuda.is_available() else "mps"
+        )  # the device to load the model onto
+
+        self._tokenizer = AutoTokenizer.from_pretrained(
+            "distilbert/distilbert-base-uncased", device=self.device
+        )
+        self._model = AutoModel.from_pretrained(
+            "distilbert/distilbert-base-uncased",
+            torch_dtype=torch.float16,
+        ).to(self.device)
+
+    def predict(self, model_input):
+        # Run model inference here
+        
+        text = model_input.get("text")
+
+        encoded_input = self._tokenizer(text, return_tensors='pt').to(self.device)
+        
+        return self._model(**encoded_input).last_hidden_state.tolist()
diff --git a/distilbert/requirements.txt b/distilbert/requirements.txt
new file mode 100644
index 000000000..c42de737b
--- /dev/null
+++ b/distilbert/requirements.txt
@@ -0,0 +1,3 @@
+hf-transfer==0.1.6
+torch==2.2.2
+transformers==4.40.0

From e6d3a06a03b79a64bf27be2a8298341fab915cf1 Mon Sep 17 00:00:00 2001
From: Bola Malek <bola@baseten.co>
Date: Thu, 18 Apr 2024 14:34:39 -0700
Subject: [PATCH 2/2] lint

---
 distilbert/config.yaml    | 2 +-
 distilbert/model/model.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/distilbert/config.yaml b/distilbert/config.yaml
index 4f71a1748..3f15fbd8d 100644
--- a/distilbert/config.yaml
+++ b/distilbert/config.yaml
@@ -3,4 +3,4 @@ model_name: DistilBert
 python_version: py310
 requirements_file: ./requirements.txt
 resources:
-  accelerator: T4
\ No newline at end of file
+  accelerator: T4
diff --git a/distilbert/model/model.py b/distilbert/model/model.py
index e6f0dc4e1..ede40ef2f 100644
--- a/distilbert/model/model.py
+++ b/distilbert/model/model.py
@@ -9,7 +9,7 @@ def __init__(self, **kwargs):
     def load(self):
         # Load model here and assign to self._model.
         self.device = (
-            "cuda" if torch.cuda.is_available() else "mps"
+            "cuda" if torch.cuda.is_available() else "cpu"
         )  # the device to load the model onto
 
         self._tokenizer = AutoTokenizer.from_pretrained(