diff --git a/pages/generative-apis/how-to/query-vision-models.mdx b/pages/generative-apis/how-to/query-vision-models.mdx index 1bac3e7330..dd123d1602 100644 --- a/pages/generative-apis/how-to/query-vision-models.mdx +++ b/pages/generative-apis/how-to/query-vision-models.mdx @@ -79,9 +79,9 @@ client = OpenAI( You can now create a chat completion: ```python -# Create a chat completion using the 'pixtral-12b-2409' model +# Create a chat completion using the 'mistral-small-3.2-24b-instruct-2506' model response = client.chat.completions.create( - model="pixtral-12b-2409", + model="mistral-small-3.2-24b-instruct-2506", messages=[ { "role": "user", @@ -109,57 +109,43 @@ A conversation style may include a default system prompt. You may set this promp [ { "role": "system", - "content": "You are Xavier Niel." + "content": "You are an expert developer." } ] ``` -### Passing images to Pixtral +### Passing images to the vision model 1. **Image URLs**: If the image is available online, you can just include the image URL in your request as demonstrated above. This approach is simple and does not require any encoding. 2. **Base64 encoded**: image Base64 encoding is a standard way to transform binary data, like images, into a text format, making it easier to transmit over the internet. -To encode Base64 images in Python, you first need to install `Pillow` library: - -```bash -pip install pillow -``` - -Then, the following Python code sample shows you how to encode an image in Base64 format and pass it to a request payload for the Chat Completions API: +The following code sample shows you how to encode an image in Base64 format and pass it to a request payload for the Chat Completions API, with an [example image](https://genapi-documentation-assets.s3.fr-par.scw.cloud/scaleway-illustration-robot.jpg). ```python import base64 -from io import BytesIO -from PIL import Image - -def encode_image(img): - buffered = BytesIO() - img.save(buffered, format="JPEG") - encoded_string = base64.b64encode(buffered.getvalue()).decode("utf-8") - return encoded_string - -img = Image.open("path_to_your_image.jpg") -base64_img = encode_image(img) - -payload = { - "messages": [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "What is this image?" - }, - { - "type": "image_url", - "image_url": { - "url": f"data:image/jpeg;base64,{base64_img}" - } - } - ] - } - ] -} + +with open("path/to/your/image.png", "rb") as file: + image_content = file.read() + encoded_image = base64.b64encode(image_content).decode("utf-8") + +response = client.chat.completions.create( + model="mistral-small-3.2-24b-instruct-2506", + messages=[ + { + "role": "user", + "content": [ + {"type": "text", "text": "What is this image?"}, + {"type": "image_url", + "image_url": { + "url": f"data:image/jpeg;base64,{encoded_image}"} + }, + ] + } + ], + temperature=0.7, + max_tokens=2048, + top_p=0.9 +) ``` @@ -191,15 +177,15 @@ client = OpenAI( api_key="" # Your unique API key from Scaleway ) response = client.chat.completions.create( -model="pixtral-12b-2409", -messages=[{ - "role": "user", - "content": [ - {"type": "text", "text": "What is this image?"}, - {"type": "image_url", "image_url": {"url": "https://picsum.photos/id/32/512/512"}}, - ] -}], -stream=True, + model="mistral-small-3.2-24b-instruct-2506", + messages=[{ + "role": "user", + "content": [ + {"type": "text", "text": "What is this image?"}, + {"type": "image_url", "image_url": {"url": "https://picsum.photos/id/32/512/512"}}, + ] + }], + stream=True, ) for chunk in response: @@ -224,7 +210,7 @@ client = AsyncOpenAI( async def main(): stream = await client.chat.completions.create( - model="pixtral-12b-2409", + model="mistral-small-3.2-24b-instruct-2506", messages=[{ "role": "user", "content": [ @@ -244,7 +230,7 @@ asyncio.run(main()) ## Frequently Asked Questions #### Is there a limit to the size of each image? -The only limitation is in context window (1 token for each 16x16 pixel). +The only limitation are the tokens context window and the maximum resolution supported by the model (images will be automatically downscaled to fit within maximum resolution). Refer to our [model catalog](/managed-inference/reference-content/model-catalog/#mistral-small-31-24b-instruct-2503) for more information about supported formats and token dimensions for each model. #### What is the maximum amount of images per conversation? Each conversation can handle up to 12 images (per request). Attempting to add a 13th image will result in a 400 Bad Request error.