Skip to content

Commit 8011390

Browse files
committed
add chat template deployment
1 parent 9367514 commit 8011390

File tree

1 file changed

+57
-0
lines changed
  • 11-embeddings-reranker-classification-tensorrt/BEI-skywork-skywork-reward-llama-3.1-8b-v0.2-reward-model-fp8

1 file changed

+57
-0
lines changed
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
"""example script on how to use the chat template deployment"""
2+
3+
import os
4+
from functools import cache
5+
6+
import requests
7+
8+
MESSAGE_TYPE = list[dict]
9+
HEADERS = {f"Authorization": f"Api-Key {os.environ['BASETEN_API_KEY']}"}
10+
DEPLOYMENT_ID = "03y1d2v3"
11+
12+
13+
@cache
14+
def get_tokenizer():
15+
"""optmization,"""
16+
from huggingface_hub import snapshot_download
17+
from transformers import AutoTokenizer
18+
19+
snapshot_download(
20+
"Skywork/Skywork-Reward-Llama-3.1-8B-v0.2",
21+
local_dir="/tmp/skywork-reward-llama-3.1-8b-v0.2",
22+
allow_patterns=["*.json", "*.txt"],
23+
)
24+
return AutoTokenizer.from_pretrained("/tmp/skywork-reward-llama-3.1-8b-v0.2")
25+
26+
27+
def apply_chat_template(messages: MESSAGE_TYPE) -> str:
28+
tokenizer = get_tokenizer()
29+
with_template = tokenizer.apply_chat_template(messages, tokenize=False)
30+
return with_template
31+
32+
33+
def send_to_deployment(messages: MESSAGE_TYPE):
34+
templated = apply_chat_template(messages)
35+
response = requests.post(
36+
headers=HEADERS,
37+
url=f"https://model-{DEPLOYMENT_ID}.api.baseten.co/environments/production/sync/predict",
38+
json={
39+
"inputs": templated,
40+
"raw_scores": True,
41+
"truncate": True,
42+
"truncation_direction": "Right",
43+
},
44+
)
45+
if response.status_code != 200:
46+
raise Exception(f"Failed to send to deployment: {response.text}")
47+
return response.json()
48+
49+
50+
if __name__ == "__main__":
51+
messages = [
52+
{"role": "system", "content": "You are a helpful assistant."},
53+
{"role": "user", "content": "Hello, how are you?"},
54+
{"role": "assistant", "content": "I'm doing great. How can I help you today?"},
55+
{"role": "user", "content": "I'd like to show off how chat templating works!"},
56+
]
57+
send_to_deployment(messages)

0 commit comments

Comments
 (0)