Skip to content

Commit 22e744b

Browse files
authored
[Build] Fix CI and Notebook builds (#940)
Get the CI and Notebook builds working again. The main fix involves the authentication to the AzureAI endpoints; we now have an Entra ID to use, rather than an `api_key`. The instructions are [on the Microsoft site](https://learn.microsoft.com/en-us/azure/developer/github/connect-from-azure). Also update the chat templates again, for Phi3 and Mistral. The fix for Mistral is not ideal. It appears that it now 'supports' a `system` message by inserting it into the first `user` message. I don't think our approach can really support this.
1 parent b72c8cd commit 22e744b

File tree

10 files changed

+184
-110
lines changed

10 files changed

+184
-110
lines changed

.github/workflows/ci_tests.yml

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,10 @@ on:
1313
# Run at 1030 UTC every day
1414
- cron: '30 10 * * *'
1515

16+
permissions:
17+
id-token: write
18+
contents: read
19+
1620
jobs:
1721
build:
1822

@@ -24,15 +28,15 @@ jobs:
2428

2529
steps:
2630
- uses: actions/checkout@v4
31+
- name: Set up Python ${{ matrix.python-version }}
32+
uses: actions/setup-python@v5
33+
with:
34+
python-version: ${{ matrix.python-version }}
2735
- name: Install Rust
2836
shell: bash
2937
run: |
3038
curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain 1.75.0
3139
echo "$HOME/.cargo/bin" >> $GITHUB_PATH
32-
- name: Set up Python ${{ matrix.python-version }}
33-
uses: actions/setup-python@v5
34-
with:
35-
python-version: ${{ matrix.python-version }}
3640
- name: Show GPUs
3741
run: |
3842
nvidia-smi
@@ -57,15 +61,23 @@ jobs:
5761
- name: Check GPU available
5862
run: |
5963
python -c "import torch; assert torch.cuda.is_available()"
64+
- name: 'Az CLI login'
65+
uses: azure/login@v1
66+
with:
67+
client-id: ${{ secrets.AZURE_CLIENT_ID }}
68+
tenant-id: ${{ secrets.AZURE_TENANT_ID }}
69+
subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
70+
- name: 'Run Azure CLI commands'
71+
run: |
72+
az account show
73+
az group list
6074
- name: Test with pytest
6175
env:
6276
HF_TOKEN: ${{ secrets.HF_TOKEN }}
6377
# Configure endpoints for Azure OpenAI
6478
AZUREAI_CHAT_ENDPOINT: ${{ secrets.AZUREAI_CHAT_ENDPOINT }}
65-
AZUREAI_CHAT_KEY: ${{ secrets.AZUREAI_CHAT_KEY }}
6679
AZUREAI_CHAT_MODEL: ${{ secrets.AZUREAI_CHAT_MODEL }}
6780
AZUREAI_COMPLETION_ENDPOINT: ${{ secrets.AZUREAI_COMPLETION_ENDPOINT }}
68-
AZUREAI_COMPLETION_KEY: ${{ secrets.AZUREAI_COMPLETION_KEY }}
6981
AZUREAI_COMPLETION_MODEL: ${{ secrets.AZUREAI_COMPLETION_MODEL }}
7082
# Configure endpoints for Azure AI Studio
7183
AZURE_AI_STUDIO_PHI3_ENDPOINT: ${{ vars.AZURE_AI_STUDIO_PHI3_ENDPOINT }}

.github/workflows/notebook_tests.yml

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@ on:
1616
# Run at 0830 UTC every day
1717
- cron: '30 08 * * *'
1818

19+
permissions:
20+
id-token: write
21+
contents: read
22+
1923
jobs:
2024
build:
2125

@@ -60,15 +64,23 @@ jobs:
6064
- name: Check GPU available
6165
run: |
6266
python -c "import torch; assert torch.cuda.is_available()"
67+
- name: 'Az CLI login'
68+
uses: azure/login@v1
69+
with:
70+
client-id: ${{ secrets.AZURE_CLIENT_ID }}
71+
tenant-id: ${{ secrets.AZURE_TENANT_ID }}
72+
subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
73+
- name: 'Run Azure CLI commands'
74+
run: |
75+
az account show
76+
az group list
6377
- name: Test with pytest
6478
env:
6579
HF_TOKEN: ${{ secrets.HF_TOKEN }}
6680
# Configure endpoints for Azure OpenAI
6781
AZUREAI_CHAT_ENDPOINT: ${{ secrets.AZUREAI_CHAT_ENDPOINT }}
68-
AZUREAI_CHAT_KEY: ${{ secrets.AZUREAI_CHAT_KEY }}
6982
AZUREAI_CHAT_MODEL: ${{ secrets.AZUREAI_CHAT_MODEL }}
7083
AZUREAI_COMPLETION_ENDPOINT: ${{ secrets.AZUREAI_COMPLETION_ENDPOINT }}
71-
AZUREAI_COMPLETION_KEY: ${{ secrets.AZUREAI_COMPLETION_KEY }}
7284
AZUREAI_COMPLETION_MODEL: ${{ secrets.AZUREAI_COMPLETION_MODEL }}
7385
# Configure endpoints for Azure AI Studio
7486
AZURE_AI_STUDIO_PHI3_ENDPOINT: ${{ vars.AZURE_AI_STUDIO_PHI3_ENDPOINT }}

guidance/chat.py

Lines changed: 72 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,8 @@ def __contains__(self, key: str):
3737

3838
# Feels weird having to instantiate this, but it's a singleton for all purposes
3939
# TODO [HN]: Add an alias system so we can instantiate with other simple keys (e.g. "llama2" instead of the full template string)
40-
CHAT_TEMPLATE_CACHE = ChatTemplateCache()
40+
CHAT_TEMPLATE_CACHE = ChatTemplateCache()
41+
4142

4243
class UnsupportedRoleException(Exception):
4344
def __init__(self, role_name, instance):
@@ -46,11 +47,12 @@ def __init__(self, role_name, instance):
4647
super().__init__(self._format_message())
4748

4849
def _format_message(self):
49-
return (f"Role {self.role_name} is not supported by the {self.instance.__class__.__name__} chat template. ")
50+
return f"Role {self.role_name} is not supported by the {self.instance.__class__.__name__} chat template. "
51+
5052

5153
def load_template_class(chat_template=None):
5254
"""Utility method to find the best chat template.
53-
55+
5456
Order of precedence:
5557
- If it's a chat template class, use it directly
5658
- If it's a string, check the cache of popular model templates
@@ -60,23 +62,27 @@ def load_template_class(chat_template=None):
6062
"""
6163
if inspect.isclass(chat_template) and issubclass(chat_template, ChatTemplate):
6264
if chat_template is ChatTemplate:
63-
raise Exception("You can't use the base ChatTemplate class directly. Create or use a subclass instead.")
65+
raise Exception(
66+
"You can't use the base ChatTemplate class directly. Create or use a subclass instead."
67+
)
6468
return chat_template
65-
69+
6670
elif isinstance(chat_template, str):
6771
# First check the cache of popular model types
6872
# TODO: Expand keys of cache to include aliases for popular model types (e.g. "llama2, phi3")
6973
# Can possibly accomplish this with an "aliases" dictionary that maps all aliases to the canonical key in cache
7074
if chat_template in CHAT_TEMPLATE_CACHE:
7175
return CHAT_TEMPLATE_CACHE[chat_template]
7276
# TODO: Add logic here to try to auto-create class dynamically via _template_class_from_string method
73-
77+
7478
# Only warn when a user provided a chat template that we couldn't load
7579
if chat_template is not None:
76-
warnings.warn(f"""Chat template {chat_template} was unable to be loaded directly into guidance.
80+
warnings.warn(
81+
f"""Chat template {chat_template} was unable to be loaded directly into guidance.
7782
Defaulting to the ChatML format which may not be optimal for the selected model.
78-
For best results, create and pass in a `guidance.ChatTemplate` subclass for your model.""")
79-
83+
For best results, create and pass in a `guidance.ChatTemplate` subclass for your model."""
84+
)
85+
8086
# By default, use the ChatML Template. Warnings to user will happen downstream only if they use chat roles.
8187
return ChatMLTemplate
8288

@@ -94,15 +100,18 @@ def _template_class_from_string(template_str):
94100
# --------------------------------------------------
95101
# Note that all grammarless models will default to this syntax, since we typically send chat formatted messages.
96102
chatml_template = "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}"
103+
104+
97105
class ChatMLTemplate(ChatTemplate):
98106
template_str = chatml_template
99107

100108
def get_role_start(self, role_name):
101109
return f"<|im_start|>{role_name}\n"
102-
110+
103111
def get_role_end(self, role_name=None):
104112
return "<|im_end|>\n"
105113

114+
106115
CHAT_TEMPLATE_CACHE[chatml_template] = ChatMLTemplate
107116

108117

@@ -111,6 +120,8 @@ def get_role_end(self, role_name=None):
111120
# --------------------------------------------------
112121
# [05/08/24] https://huggingface.co/meta-llama/Llama-2-7b-chat-hf/blob/main/tokenizer_config.json#L12
113122
llama2_template = "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<<SYS>>\\n' + system_message + '\\n<</SYS>>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + ' ' + eos_token }}{% endif %}{% endfor %}"
123+
124+
114125
class Llama2ChatTemplate(ChatTemplate):
115126
# available_roles = ["system", "user", "assistant"]
116127
template_str = llama2_template
@@ -124,7 +135,7 @@ def get_role_start(self, role_name):
124135
return " "
125136
else:
126137
raise UnsupportedRoleException(role_name, self)
127-
138+
128139
def get_role_end(self, role_name=None):
129140
if role_name == "system":
130141
return "\n<</SYS>"
@@ -135,6 +146,7 @@ def get_role_end(self, role_name=None):
135146
else:
136147
raise UnsupportedRoleException(role_name, self)
137148

149+
138150
CHAT_TEMPLATE_CACHE[llama2_template] = Llama2ChatTemplate
139151

140152

@@ -143,6 +155,8 @@ def get_role_end(self, role_name=None):
143155
# --------------------------------------------------
144156
# [05/08/24] https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct/blob/main/tokenizer_config.json#L2053
145157
llama3_template = "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}"
158+
159+
146160
class Llama3ChatTemplate(ChatTemplate):
147161
# available_roles = ["system", "user", "assistant"]
148162
template_str = llama3_template
@@ -156,52 +170,89 @@ def get_role_start(self, role_name):
156170
return "<|start_header_id|>assistant<|end_header_id|>\n\n"
157171
else:
158172
raise UnsupportedRoleException(role_name, self)
159-
173+
160174
def get_role_end(self, role_name=None):
161175
return "<|eot_id|>"
162176

177+
163178
CHAT_TEMPLATE_CACHE[llama3_template] = Llama3ChatTemplate
164179

165180
# --------------------------------------------------
166181
# @@@@ Phi-3 @@@@
167182
# --------------------------------------------------
168183
# [05/08/24] https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/blob/main/tokenizer_config.json#L119
169-
phi3_template = "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') %}{{'<|user|>' + '\n' + message['content'] + '<|end|>' + '\n' + '<|assistant|>' + '\n'}}{% elif (message['role'] == 'assistant') %}{{message['content'] + '<|end|>' + '\n'}}{% endif %}{% endfor %}"
170-
class Phi3ChatTemplate(ChatTemplate):
184+
phi3_mini_template = "{% for message in messages %}{% if message['role'] == 'system' %}{{'<|system|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'user' %}{{'<|user|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>\n' + message['content'] + '<|end|>\n'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %}"
185+
186+
187+
class Phi3MiniChatTemplate(ChatTemplate):
171188
# available_roles = ["user", "assistant"]
172-
template_str = phi3_template
189+
template_str = phi3_mini_template
173190

174191
def get_role_start(self, role_name):
175192
if role_name == "user":
176193
return "<|user|>"
177194
elif role_name == "assistant":
178195
return "<|assistant|>"
196+
elif role_name == "system":
197+
return "<|system|>"
179198
else:
180199
raise UnsupportedRoleException(role_name, self)
181-
200+
182201
def get_role_end(self, role_name=None):
183202
return "<|end|>"
184203

185-
CHAT_TEMPLATE_CACHE[phi3_template] = Phi3ChatTemplate
186204

205+
CHAT_TEMPLATE_CACHE[phi3_mini_template] = Phi3MiniChatTemplate
206+
207+
# https://huggingface.co/microsoft/Phi-3-small-8k-instruct/blob/main/tokenizer_config.json
208+
phi3_small_template = "{{ bos_token }}{% for message in messages %}{{'<|' + message['role'] + '|>' + '\n' + message['content'] + '<|end|>\n' }}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %}"
209+
210+
211+
# https://huggingface.co/microsoft/Phi-3-medium-4k-instruct/blob/main/tokenizer_config.json#L119
212+
phi3_medium_template = "{% for message in messages %}{% if (message['role'] == 'user') %}{{'<|user|>' + '\n' + message['content'] + '<|end|>' + '\n' + '<|assistant|>' + '\n'}}{% elif (message['role'] == 'assistant') %}{{message['content'] + '<|end|>' + '\n'}}{% endif %}{% endfor %}"
213+
214+
215+
# Although the templates are different, the roles are the same between medium and small (for now)
216+
class Phi3SmallMediumChatTemplate(ChatTemplate):
217+
# available_roles = ["user", "assistant"]
218+
template_str = phi3_small_template
219+
220+
def get_role_start(self, role_name):
221+
if role_name == "user":
222+
return "<|user|>"
223+
elif role_name == "assistant":
224+
return "<|assistant|>"
225+
else:
226+
raise UnsupportedRoleException(role_name, self)
227+
228+
def get_role_end(self, role_name=None):
229+
return "<|end|>"
230+
231+
232+
CHAT_TEMPLATE_CACHE[phi3_small_template] = Phi3SmallMediumChatTemplate
233+
CHAT_TEMPLATE_CACHE[phi3_medium_template] = Phi3SmallMediumChatTemplate
187234

188235
# --------------------------------------------------
189236
# @@@@ Mistral-7B-Instruct-v0.2 @@@@
190237
# --------------------------------------------------
191238
# [05/08/24] https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2/blob/main/tokenizer_config.json#L42
192-
mistral_7b_instruct_template = "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}"
239+
mistral_7b_instruct_template = "{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content'] %}\n {%- set loop_messages = messages[1:] %}\n{%- else %}\n {%- set loop_messages = messages %}\n{%- endif %}\n\n{{- bos_token }}\n{%- for message in loop_messages %}\n {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}\n {{- raise_exception('After the optional system message, conversation roles must alternate user/assistant/user/assistant/...') }}\n {%- endif %}\n {%- if message['role'] == 'user' %}\n {%- if loop.first and system_message is defined %}\n {{- ' [INST] ' + system_message + '\\n\\n' + message['content'] + ' [/INST]' }}\n {%- else %}\n {{- ' [INST] ' + message['content'] + ' [/INST]' }}\n {%- endif %}\n {%- elif message['role'] == 'assistant' %}\n {{- ' ' + message['content'] + eos_token}}\n {%- else %}\n {{- raise_exception('Only user and assistant roles are supported, with the exception of an initial optional system message!') }}\n {%- endif %}\n{%- endfor %}\n"
240+
241+
193242
class Mistral7BInstructChatTemplate(ChatTemplate):
194243
# available_roles = ["user", "assistant"]
195244
template_str = mistral_7b_instruct_template
196245

197246
def get_role_start(self, role_name):
198247
if role_name == "user":
199-
return "[INST] "
248+
return " [INST] "
200249
elif role_name == "assistant":
201250
return " "
251+
elif role_name == "system":
252+
raise ValueError("Please include system instructions in the first user message")
202253
else:
203254
raise UnsupportedRoleException(role_name, self)
204-
255+
205256
def get_role_end(self, role_name=None):
206257
if role_name == "user":
207258
return " [/INST]"
@@ -210,4 +261,5 @@ def get_role_end(self, role_name=None):
210261
else:
211262
raise UnsupportedRoleException(role_name, self)
212263

264+
213265
CHAT_TEMPLATE_CACHE[mistral_7b_instruct_template] = Mistral7BInstructChatTemplate

guidance/models/_azure_guidance.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import json
55
import urllib.parse
66
from ._model import Engine, Model, EngineCallResponse
7-
from ..chat import Phi3ChatTemplate
7+
from ..chat import Phi3MiniChatTemplate
88
from ._byte_tokenizer import ByteTokenizer
99

1010

@@ -30,7 +30,7 @@ def __init__(self, server_url, max_streaming_tokens=1000, chat_template=None):
3030

3131
if chat_template is None:
3232
# TODO [PK]: obtain this from the server
33-
chat_template=Phi3ChatTemplate
33+
chat_template=Phi3MiniChatTemplate
3434

3535
tokenizer = ByteTokenizer(chat_template)
3636

notebooks/api_examples/models/AzureOpenAI.ipynb

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
"import os\n",
3737
"\n",
3838
"# Uncomment if using DefaultAzureCredential below\n",
39-
"# from azure.identity import DefaultAzureCredential, get_bearer_token_provider\n",
39+
"from azure.identity import DefaultAzureCredential, get_bearer_token_provider\n",
4040
"\n",
4141
"# This is the name of the model deployed, such as 'gpt-4' or 'gpt-3.5-turbo\n",
4242
"model = os.getenv(\"AZUREAI_CHAT_MODEL\", \"Please set the model\")\n",
@@ -52,13 +52,13 @@
5252
"azure_api_version = os.getenv(\"AZUREAI_CHAT_API_VERSION\", \"Please set the API version\")\n",
5353
"\n",
5454
"# The environment variable should be set to the API key from the Azure AI playground:\n",
55-
"api_key=os.getenv(\"AZUREAI_CHAT_KEY\", \"Please set API key\")\n",
55+
"# api_key=os.getenv(\"AZUREAI_CHAT_KEY\", \"Please set API key\")\n",
5656
"\n",
5757
"# Alternatively, we can use Entra authentication\n",
58-
"# token_provider = get_bearer_token_provider(\n",
59-
"# DefaultAzureCredential(),\n",
60-
"# \"https://cognitiveservices.azure.com/.default\"\n",
61-
"#)"
58+
"token_provider = get_bearer_token_provider(\n",
59+
" DefaultAzureCredential(),\n",
60+
" \"https://cognitiveservices.azure.com/.default\"\n",
61+
")"
6262
]
6363
},
6464
{
@@ -84,9 +84,9 @@
8484
" azure_deployment=azure_deployment,\n",
8585
" version=azure_api_version,\n",
8686
" # For authentication, use either\n",
87-
" api_key=api_key\n",
87+
" # api_key=api_key\n",
8888
" # or\n",
89-
" # azure_ad_token_provider=token_provider\n",
89+
" azure_ad_token_provider=token_provider\n",
9090
")"
9191
]
9292
},

0 commit comments

Comments
 (0)