update to ungated model with chat template and newer vllm (#418)

dsingal0 · web-flow · commit fb64e584cf10 · 2025-03-12T18:28:05.000-07:00
diff --git a/custom-server/pixtral-12b/config.yaml b/custom-server/pixtral-12b/config.yaml
@@ -1,5 +1,5 @@
 base_image:
-  image: vllm/vllm-openai:v0.6.1.post1
+  image: vllm/vllm-openai:v0.7.3
 model_metadata:
   repo_id: mistralai/Pixtral-12B-2409
   avatar_url: https://cdn.baseten.co/production/static/explore/mistral_logo.png
@@ -26,18 +26,21 @@ model_metadata:
     "max_tokens": 512,
     "temperature": 0.5
   }
+  tags:
+    - openai-compatible
+    - multimodal
+    - text-generation
 docker_server:
-  start_command: sh -c "HF_TOKEN=$(cat /secrets/hf_access_token) vllm serve mistralai/Pixtral-12B-2409 --served-model-name pixtral --tokenizer_mode mistral --limit_mm_per_prompt 'image=4'"
+  start_command: sh -c "vllm serve mistral-community/pixtral-12b --served-model-name pixtral --max-model-len 65536 --chat-template /app/data/pixtral12b.jinja --chat-template-content-format string --limit_mm_per_prompt 'image=4' --gpu-memory-utilization 0.95"
   readiness_endpoint: /health
   liveness_endpoint: /health
   predict_endpoint: /v1/chat/completions
   server_port: 8000
 runtime:
   predict_concurrency : 16
 resources:
-  accelerator: A100
+  accelerator: H100
   use_gpu: true
-model_name: Pixtral Trussless
+model_name: Pixtral 12B
 environment_variables:
-  VLLM_LOGGING_LEVEL: WARNING
-  hf_access_token: null
+  VLLM_LOGGING_LEVEL: INFO
diff --git a/custom-server/pixtral-12b/data/pixtral12b.jinja b/custom-server/pixtral-12b/data/pixtral12b.jinja
@@ -0,0 +1,90 @@
+{{- bos_token }}
+{%- for message in messages %}
+    {%- if message['role'] == 'user' %}
+        {{- '[INST]' }}
+        {%- if message['content'] is not string %}
+            {%- for chunk in message['content'] %}
+                {%- if chunk['type'] == 'text' %}
+                    {{- chunk['content'] }}
+                {%- elif chunk['type'] == 'image' %}
+                    {{- '[IMG]' }}
+                {%- else %}
+                    {{- raise_exception('Unrecognized content type!') }}
+                {%- endif %}
+            {%- endfor %}
+        {%- else %}
+            {{- message['content'] }}
+        {%- endif %}
+        {%- if tools is defined and tools is not none and loop.last %}
+            {{- '[AVAILABLE_TOOLS][' }}
+            {%- for tool in tools %}
+                {%- set tool = tool.function %}
+                {{- '{"type": "function", "function": {' }}
+                {%- for key, val in tool.items() if key != "return" %}
+                    {%- if val is string %}
+                        {{- '"' + key + '": "' + val + '"' }}
+                    {%- else %}
+                        {{- '"' + key + '": ' + val|tojson }}
+                    {%- endif %}
+                    {%- if not loop.last %}
+                        {{- ', ' }}
+                    {%- endif %}
+                {%- endfor %}
+                {{- '}}' }}
+                {%- if not loop.last %}
+                    {{- ', ' }}
+                {%- endif %}
+            {%- endfor %}
+            {{- ']' }}
+            {{- '[/AVAILABLE_TOOLS]' }}
+        {%- endif %}
+        {{- '[/INST]' }}
+    {%- elif message['role'] == 'system' %}
+        {{- '[SYSTEM_PROMPT]' + message['content'] + '[/SYSTEM_PROMPT]' }}
+    {%- elif message['role'] == 'assistant' %}
+        {%- if message['content'] is not string %}
+            {%- for chunk in message['content'] %}
+                {%- if chunk['type'] == 'text' %}
+                    {{- chunk['content']|trim }}
+                {%- elif chunk['type'] == 'image' %}
+                    {{- '[IMG]' }}
+                {%- else %}
+                    {{- raise_exception('Unrecognized content type!') }}
+                {%- endif %}
+            {%- endfor %}
+        {%- else %}
+            {{- message['content']|trim }}
+        {%- endif %}
+        {{- eos_token }}
+    {%- elif message['role'] == 'tool_results' or message['role'] == 'tool' %}
+        {%- if message.content is defined and message.content.content is defined %}
+            {%- set content = message.content.content %}
+        {%- else %}
+            {%- set content = message.content %}
+        {%- endif %}
+        {{- '[TOOL_RESULTS]{"content": ' + content|string + ', ' }}
+        {%- if message.content is mapping and 'image' in message.content %}
+            {{- '"image": "[IMG]", ' }}
+        {%- endif %}
+        {%- if not message.tool_call_id is defined or message.tool_call_id|length != 9 %}
+            {{- raise_exception("Tool call IDs should be alphanumeric strings with length 9!") }}
+        {%- endif %}
+        {{- '"call_id": "' + message.tool_call_id + '"}[/TOOL_RESULTS]' }}
+    {%- elif message.tool_calls is defined and message.tool_calls is not none %}
+        {{- '[TOOL_CALLS][' }}
+        {%- for tool_call in message.tool_calls %}
+            {%- set out = tool_call.function|tojson %}
+            {{- out[:-1] }}
+            {%- if not tool_call.id is defined or tool_call.id|length != 9 %}
+                {{- raise_exception("Tool call IDs should be alphanumeric strings with length 9!") }}
+            {%- endif %}
+            {{- ', "id": "' + tool_call.id + '"}' }}
+            {%- if not loop.last %}
+                {{- ', ' }}
+            {%- endif %}
+        {%- endfor %}
+        {{- ']' + eos_token }}
+    {%- else %}
+        {{- raise_exception('Only user, system, assistant, tool, and tool_results roles are supported!') }}
+    {%- endif %}
+{%- endfor %}