diff --git a/README.md b/README.md index 9a4976b4..a96fa3b8 100644 --- a/README.md +++ b/README.md @@ -58,7 +58,7 @@ client = Together() # Simple text message response = client.chat.completions.create( - model="mistralai/Mixtral-8x7B-Instruct-v0.1", + model="meta-llama/Llama-4-Scout-17B-16E-Instruct", messages=[{"role": "user", "content": "tell me about new york"}], ) print(response.choices[0].message.content) @@ -148,7 +148,7 @@ from together import Together client = Together() stream = client.chat.completions.create( - model="mistralai/Mixtral-8x7B-Instruct-v0.1", + model="meta-llama/Llama-4-Scout-17B-16E-Instruct", messages=[{"role": "user", "content": "tell me about new york"}], stream=True, ) @@ -173,7 +173,7 @@ async def async_chat_completion(messages): async_client = AsyncTogether() tasks = [ async_client.chat.completions.create( - model="mistralai/Mixtral-8x7B-Instruct-v0.1", + model="meta-llama/Llama-4-Scout-17B-16E-Instruct", messages=[{"role": "user", "content": message}], ) for message in messages @@ -196,7 +196,7 @@ from together import Together client = Together() response = client.chat.completions.create( - model="mistralai/Mixtral-8x7B-Instruct-v0.1", + model="meta-llama/Llama-3.2-3B-Instruct-Turbo", messages=[{"role": "user", "content": "tell me about new york"}], logprobs=1 ) @@ -347,7 +347,7 @@ client.files.delete(id="file-d0d318cb-b7d9-493a-bd70-1cfe089d3815") # deletes a ### Fine-tunes -The finetune API is used for fine-tuning and allows developers to create finetuning jobs. It also has several methods to list all jobs, retrive statuses and get checkpoints. Please refer to our fine-tuning docs [here](https://docs.together.ai/docs/fine-tuning-python). +The finetune API is used for fine-tuning and allows developers to create finetuning jobs. It also has several methods to list all jobs, retrive statuses and get checkpoints. Please refer to our fine-tuning docs [here](https://docs.together.ai/docs/fine-tuning-quickstart). ```python from together import Together @@ -356,7 +356,7 @@ client = Together() client.fine_tuning.create( training_file = 'file-d0d318cb-b7d9-493a-bd70-1cfe089d3815', - model = 'mistralai/Mixtral-8x7B-Instruct-v0.1', + model = 'meta-llama/Llama-3.2-3B-Instruct', n_epochs = 3, n_checkpoints = 1, batch_size = "max", @@ -394,7 +394,7 @@ for model in models: together chat.completions \ --message "system" "You are a helpful assistant named Together" \ --message "user" "What is your name?" \ - --model mistralai/Mixtral-8x7B-Instruct-v0.1 + --model meta-llama/Llama-4-Scout-17B-16E-Instruct ``` The Chat Completions CLI enables streaming tokens to stdout by default. To disable streaming, use `--no-stream`. @@ -404,7 +404,7 @@ The Chat Completions CLI enables streaming tokens to stdout by default. To disab ```bash together completions \ "Large language models are " \ - --model mistralai/Mixtral-8x7B-v0.1 \ + --model meta-llama/Llama-4-Scout-17B-16E-Instruct \ --max-tokens 512 \ --stop "." ``` diff --git a/src/together/cli/api/endpoints.py b/src/together/cli/api/endpoints.py index f9634f9c..3b1da3f4 100644 --- a/src/together/cli/api/endpoints.py +++ b/src/together/cli/api/endpoints.py @@ -82,7 +82,7 @@ def endpoints(ctx: click.Context) -> None: @click.option( "--model", required=True, - help="The model to deploy (e.g. mistralai/Mixtral-8x7B-Instruct-v0.1)", + help="The model to deploy (e.g. meta-llama/Llama-4-Scout-17B-16E-Instruct)", ) @click.option( "--min-replicas", diff --git a/tests/integration/resources/test_completion_stream.py b/tests/integration/resources/test_completion_stream.py index de86f0ae..588cbe6d 100644 --- a/tests/integration/resources/test_completion_stream.py +++ b/tests/integration/resources/test_completion_stream.py @@ -35,7 +35,7 @@ def test_create( random_repetition_penalty, # noqa ) -> None: prompt = "The space robots have" - model = "mistralai/Mixtral-8x7B-v0.1" + model = "meta-llama/Llama-4-Scout-17B-16E-Instruct" stop = [""] # max_tokens should be a reasonable number for this test @@ -69,10 +69,12 @@ def test_create( assert isinstance(chunk.id, str) assert isinstance(chunk.created, int) assert isinstance(chunk.object, ObjectType) - assert isinstance(chunk.choices[0], CompletionChoicesChunk) - assert isinstance(chunk.choices[0].index, int) - assert isinstance(chunk.choices[0].delta, DeltaContent) - assert isinstance(chunk.choices[0].delta.content, str) + + if chunk.choices: + assert isinstance(chunk.choices[0], CompletionChoicesChunk) + assert isinstance(chunk.choices[0].index, int) + assert isinstance(chunk.choices[0].delta, DeltaContent) + assert isinstance(chunk.choices[0].delta.content, str) usage = chunk.usage