|
| 1 | +"""example script on how to use the chat template deployment""" |
| 2 | + |
| 3 | +import os |
| 4 | +from functools import cache |
| 5 | + |
| 6 | +import requests |
| 7 | + |
| 8 | +MESSAGE_TYPE = list[dict] |
| 9 | +HEADERS = {f"Authorization": f"Api-Key {os.environ['BASETEN_API_KEY']}"} |
| 10 | +DEPLOYMENT_ID = "03y1d2v3" |
| 11 | + |
| 12 | + |
| 13 | +@cache |
| 14 | +def get_tokenizer(): |
| 15 | + """optmization,""" |
| 16 | + from huggingface_hub import snapshot_download |
| 17 | + from transformers import AutoTokenizer |
| 18 | + |
| 19 | + snapshot_download( |
| 20 | + "Skywork/Skywork-Reward-Llama-3.1-8B-v0.2", |
| 21 | + local_dir="/tmp/skywork-reward-llama-3.1-8b-v0.2", |
| 22 | + allow_patterns=["*.json", "*.txt"], |
| 23 | + ) |
| 24 | + return AutoTokenizer.from_pretrained("/tmp/skywork-reward-llama-3.1-8b-v0.2") |
| 25 | + |
| 26 | + |
| 27 | +def apply_chat_template(messages: MESSAGE_TYPE) -> str: |
| 28 | + tokenizer = get_tokenizer() |
| 29 | + with_template = tokenizer.apply_chat_template(messages, tokenize=False) |
| 30 | + return with_template |
| 31 | + |
| 32 | + |
| 33 | +def send_to_deployment(messages: MESSAGE_TYPE): |
| 34 | + templated = apply_chat_template(messages) |
| 35 | + response = requests.post( |
| 36 | + headers=HEADERS, |
| 37 | + url=f"https://model-{DEPLOYMENT_ID}.api.baseten.co/environments/production/sync/predict", |
| 38 | + json={ |
| 39 | + "inputs": templated, |
| 40 | + "raw_scores": True, |
| 41 | + "truncate": True, |
| 42 | + "truncation_direction": "Right", |
| 43 | + }, |
| 44 | + ) |
| 45 | + if response.status_code != 200: |
| 46 | + raise Exception(f"Failed to send to deployment: {response.text}") |
| 47 | + return response.json() |
| 48 | + |
| 49 | + |
| 50 | +if __name__ == "__main__": |
| 51 | + messages = [ |
| 52 | + {"role": "system", "content": "You are a helpful assistant."}, |
| 53 | + {"role": "user", "content": "Hello, how are you?"}, |
| 54 | + {"role": "assistant", "content": "I'm doing great. How can I help you today?"}, |
| 55 | + {"role": "user", "content": "I'd like to show off how chat templating works!"}, |
| 56 | + ] |
| 57 | + send_to_deployment(messages) |
0 commit comments