From 9c60d61e5a0ec6534d21c4a930a7dfb46189218d Mon Sep 17 00:00:00 2001 From: Droid Date: Wed, 17 Apr 2024 15:12:08 +0000 Subject: [PATCH] Added new DBRX-Instruct Truss example. Created new directory and files including config.yaml, README.md, and model.py. Updated model.py with model loading and prediction logic. Updated config.yaml and README.md with relevant information. --- dbrx-instruct/README.md | 93 +++++++++++++++++++++++++++++++++ dbrx-instruct/config.yaml | 32 ++++++++++++ dbrx-instruct/model/__init__.py | 1 + dbrx-instruct/model/model.py | 50 ++++++++++++++++++ 4 files changed, 176 insertions(+) create mode 100644 dbrx-instruct/README.md create mode 100644 dbrx-instruct/config.yaml create mode 100644 dbrx-instruct/model/__init__.py create mode 100644 dbrx-instruct/model/model.py diff --git a/dbrx-instruct/README.md b/dbrx-instruct/README.md new file mode 100644 index 000000000..5f72ac28b --- /dev/null +++ b/dbrx-instruct/README.md @@ -0,0 +1,93 @@ +# DBRX-Instruct Truss + +DBRX-Instruct is a state-of-the-art language model developed by Anthropic, leveraging the latest advancements in constitutional AI to ensure safe and effective instruction-following capabilities. This model is particularly adept at understanding and generating human-like text, making it an invaluable tool for a wide range of applications including content creation, summarization, and question-answering tasks. + +## Deploying DBRX-Instruct Truss + +To deploy the DBRX-Instruct Truss on Baseten: + +1. Clone this repo: `git clone https://github.com/baseten/truss-examples` + +2. Make sure you have a [Baseten account](https://app.baseten.co/signup). + +3. Install Truss: `npm install -g @baseten/truss` + +4. Log in to your Baseten account using an [API key](https://docs.baseten.co/api_keys/). + +5. Deploy: `truss deploy dbrx-instruct` (you may be prompted to redeploy the model if you've deployed previously). + +## Hardware + +To deploy DBRX-Instruct you'll need the following resources in the Baseten cloud: + +- 2 CPUs +- 32GB RAM +- 1 A100 GPU Accelerator + +If your account does not have access to A100 GPUs, you can modify the `config.yaml` to use a different accelerator. + +## API + +The DBRX-Instruct Truss has a single predict route that accepts a JSON payload with the following parameters: + +| Parameter | Type | Description | +|---------------|-------------------|-------------------------------------------------------------------------------------------------------------------------| +| `prompt` | string (required) | The prompt to use for generating text. | +| `max_tokens` | integer | The maximum number of tokens to generate in the output. Defaults to 512. | +| `temperature` | float | Controls the "creativity" of the generated text. Higher values (e.g. 1.0) produce more diverse outputs. Defaults to 0.5. | + +Example payload: +```json +{ + "prompt": "Write a haiku about constitutional AI.", + "max_tokens": 128, + "temperature": 0.8 +} +``` + +## Example Usage + +You can invoke the model via a REST API: + +```bash +curl -X POST https://your-app-url.baseten.co/predict \ + -H 'Content-Type: application/json' \ + -d '{ + "prompt": "Write a haiku about constitutional AI.", + "max_tokens": 128, + "temperature": 0.8 + }' +``` + +Or using the Baseten Python client: + +```python +import baseten + +# Get the deployed model +model = baseten.deployed_model_id('your-deployed-model-id') + +# Get the model's predict route +predict = model.predict + +# Make a prediction +response = predict( + prompt="Write a haiku about constitutional AI.", + max_tokens=128, + temperature=0.8 +) +print(response) +``` + +## Generation Parameters and Limitations + +The DBRX-Instruct model allows for customization of the generation process through parameters such as `max_tokens` and `temperature`. These parameters enable users to control the length and creativity of the generated text. However, it's important to note that increasing `max_tokens` significantly can impact the response time and computational resources required. Similarly, a higher `temperature` can lead to more varied and creative outputs but may also increase the risk of generating off-topic or nonsensical text. + +## Optimal Use Cases + +DBRX-Instruct excels in scenarios requiring nuanced understanding and generation of text, such as: +- Generating high-quality, contextually relevant content for articles or blogs. +- Summarizing long documents or articles into concise paragraphs. +- Answering questions based on provided context or knowledge. + +For best results, it's recommended to provide clear, context-rich prompts and to experiment with generation parameters to find the optimal settings for your specific use case. diff --git a/dbrx-instruct/config.yaml b/dbrx-instruct/config.yaml new file mode 100644 index 000000000..75e3c52d3 --- /dev/null +++ b/dbrx-instruct/config.yaml @@ -0,0 +1,32 @@ +# Metadata +model_name: DBRX-Instruct +model_description: A language model designed for effective and safe instruction-following, leveraging constitutional AI principles. +model_avatar: +model_cover_image: +example_model_input: What are the key principles of constitutional AI used to train models like DBRX-Instruct? +tags: + - text-generation + - instruction-following + +# Runtime Config +python_version: py311 +system_packages: [] +python_packages: + - torch + - transformers + - accelerate + - sentencepiece + +# Resources +resources: + cpu: 2 + gpu: 1 + mem: 32Gi + accelerator: A100 + +hf_access_token: {{HF_ACCESS_TOKEN}} + +# Environment Variables +env_variables: {} + +external_package_dirs: [] diff --git a/dbrx-instruct/model/__init__.py b/dbrx-instruct/model/__init__.py new file mode 100644 index 000000000..932b79829 --- /dev/null +++ b/dbrx-instruct/model/__init__.py @@ -0,0 +1 @@ +# Empty file diff --git a/dbrx-instruct/model/model.py b/dbrx-instruct/model/model.py new file mode 100644 index 000000000..fc0efe53c --- /dev/null +++ b/dbrx-instruct/model/model.py @@ -0,0 +1,50 @@ +from typing import Dict + +import torch +from transformers import AutoModelForCausalLM, AutoTokenizer + + +class Model: + def __init__(self, **kwargs): + self.model = None + self.tokenizer = None + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + def load(self): + # Initialize the model and tokenizer paths for DBRX-Instruct + self.model_path = "databricks/dbrx-instruct" + self.tokenizer_path = "databricks/dbrx-instruct" + # Load the tokenizer for DBRX-Instruct + self.tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_path) + # Load the model with trust_remote_code=True to allow custom code execution + self.model = AutoModelForCausalLM.from_pretrained( + self.model_path, trust_remote_code=True + ).to(self.device) + + def preprocess(self, prompt: str) -> Dict: + return self.tokenizer(prompt, return_tensors="pt").to(self.device) + + def postprocess(self, output: Dict) -> str: + return self.tokenizer.decode(output["generated_token_ids"][0]) + + def moderate(self, text: str) -> str: + # TODO: Implement content moderation logic here + return text + + def predict(self, model_input: Dict) -> Dict: + # Extract the input text from the model_input dictionary + input_text = model_input["prompt"] + # Preprocess the input by encoding it with the tokenizer + encoded_input = self.tokenizer.encode(input_text, return_tensors="pt").to( + self.device + ) + # Generate text from the model using the encoded input + generated_tokens = self.model.generate( + encoded_input, max_length=512, num_return_sequences=1 + ) + # Decode the generated tokens into text + generated_text = self.tokenizer.decode( + generated_tokens[0], skip_special_tokens=True + ) + # Return the generated text in a dictionary + return {"generated_text": generated_text}