Skip to content

Commit fb64e58

Browse files
authored
update to ungated model with chat template and newer vllm (#418)
1 parent 3fb446a commit fb64e58

File tree

2 files changed

+99
-6
lines changed

2 files changed

+99
-6
lines changed

custom-server/pixtral-12b/config.yaml

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
base_image:
2-
image: vllm/vllm-openai:v0.6.1.post1
2+
image: vllm/vllm-openai:v0.7.3
33
model_metadata:
44
repo_id: mistralai/Pixtral-12B-2409
55
avatar_url: https://cdn.baseten.co/production/static/explore/mistral_logo.png
@@ -26,18 +26,21 @@ model_metadata:
2626
"max_tokens": 512,
2727
"temperature": 0.5
2828
}
29+
tags:
30+
- openai-compatible
31+
- multimodal
32+
- text-generation
2933
docker_server:
30-
start_command: sh -c "HF_TOKEN=$(cat /secrets/hf_access_token) vllm serve mistralai/Pixtral-12B-2409 --served-model-name pixtral --tokenizer_mode mistral --limit_mm_per_prompt 'image=4'"
34+
start_command: sh -c "vllm serve mistral-community/pixtral-12b --served-model-name pixtral --max-model-len 65536 --chat-template /app/data/pixtral12b.jinja --chat-template-content-format string --limit_mm_per_prompt 'image=4' --gpu-memory-utilization 0.95"
3135
readiness_endpoint: /health
3236
liveness_endpoint: /health
3337
predict_endpoint: /v1/chat/completions
3438
server_port: 8000
3539
runtime:
3640
predict_concurrency : 16
3741
resources:
38-
accelerator: A100
42+
accelerator: H100
3943
use_gpu: true
40-
model_name: Pixtral Trussless
44+
model_name: Pixtral 12B
4145
environment_variables:
42-
VLLM_LOGGING_LEVEL: WARNING
43-
hf_access_token: null
46+
VLLM_LOGGING_LEVEL: INFO
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
{{- bos_token }}
2+
{%- for message in messages %}
3+
{%- if message['role'] == 'user' %}
4+
{{- '[INST]' }}
5+
{%- if message['content'] is not string %}
6+
{%- for chunk in message['content'] %}
7+
{%- if chunk['type'] == 'text' %}
8+
{{- chunk['content'] }}
9+
{%- elif chunk['type'] == 'image' %}
10+
{{- '[IMG]' }}
11+
{%- else %}
12+
{{- raise_exception('Unrecognized content type!') }}
13+
{%- endif %}
14+
{%- endfor %}
15+
{%- else %}
16+
{{- message['content'] }}
17+
{%- endif %}
18+
{%- if tools is defined and tools is not none and loop.last %}
19+
{{- '[AVAILABLE_TOOLS][' }}
20+
{%- for tool in tools %}
21+
{%- set tool = tool.function %}
22+
{{- '{"type": "function", "function": {' }}
23+
{%- for key, val in tool.items() if key != "return" %}
24+
{%- if val is string %}
25+
{{- '"' + key + '": "' + val + '"' }}
26+
{%- else %}
27+
{{- '"' + key + '": ' + val|tojson }}
28+
{%- endif %}
29+
{%- if not loop.last %}
30+
{{- ', ' }}
31+
{%- endif %}
32+
{%- endfor %}
33+
{{- '}}' }}
34+
{%- if not loop.last %}
35+
{{- ', ' }}
36+
{%- endif %}
37+
{%- endfor %}
38+
{{- ']' }}
39+
{{- '[/AVAILABLE_TOOLS]' }}
40+
{%- endif %}
41+
{{- '[/INST]' }}
42+
{%- elif message['role'] == 'system' %}
43+
{{- '[SYSTEM_PROMPT]' + message['content'] + '[/SYSTEM_PROMPT]' }}
44+
{%- elif message['role'] == 'assistant' %}
45+
{%- if message['content'] is not string %}
46+
{%- for chunk in message['content'] %}
47+
{%- if chunk['type'] == 'text' %}
48+
{{- chunk['content']|trim }}
49+
{%- elif chunk['type'] == 'image' %}
50+
{{- '[IMG]' }}
51+
{%- else %}
52+
{{- raise_exception('Unrecognized content type!') }}
53+
{%- endif %}
54+
{%- endfor %}
55+
{%- else %}
56+
{{- message['content']|trim }}
57+
{%- endif %}
58+
{{- eos_token }}
59+
{%- elif message['role'] == 'tool_results' or message['role'] == 'tool' %}
60+
{%- if message.content is defined and message.content.content is defined %}
61+
{%- set content = message.content.content %}
62+
{%- else %}
63+
{%- set content = message.content %}
64+
{%- endif %}
65+
{{- '[TOOL_RESULTS]{"content": ' + content|string + ', ' }}
66+
{%- if message.content is mapping and 'image' in message.content %}
67+
{{- '"image": "[IMG]", ' }}
68+
{%- endif %}
69+
{%- if not message.tool_call_id is defined or message.tool_call_id|length != 9 %}
70+
{{- raise_exception("Tool call IDs should be alphanumeric strings with length 9!") }}
71+
{%- endif %}
72+
{{- '"call_id": "' + message.tool_call_id + '"}[/TOOL_RESULTS]' }}
73+
{%- elif message.tool_calls is defined and message.tool_calls is not none %}
74+
{{- '[TOOL_CALLS][' }}
75+
{%- for tool_call in message.tool_calls %}
76+
{%- set out = tool_call.function|tojson %}
77+
{{- out[:-1] }}
78+
{%- if not tool_call.id is defined or tool_call.id|length != 9 %}
79+
{{- raise_exception("Tool call IDs should be alphanumeric strings with length 9!") }}
80+
{%- endif %}
81+
{{- ', "id": "' + tool_call.id + '"}' }}
82+
{%- if not loop.last %}
83+
{{- ', ' }}
84+
{%- endif %}
85+
{%- endfor %}
86+
{{- ']' + eos_token }}
87+
{%- else %}
88+
{{- raise_exception('Only user, system, assistant, tool, and tool_results roles are supported!') }}
89+
{%- endif %}
90+
{%- endfor %}

0 commit comments

Comments
 (0)