Skip to content

Commit 86d6202

Browse files
committed
Much more testing
1 parent a840ee4 commit 86d6202

File tree

4 files changed

+230
-36
lines changed

4 files changed

+230
-36
lines changed

.github/workflows/python.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,4 +30,4 @@ jobs:
3030
run: black . --check --verbose
3131
- name: Run unit tests
3232
run: |
33-
python3 -m pytest
33+
python3 -m pytest -s -vv --cov --cov-fail-under=99

tests/messages.py

Lines changed: 87 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,3 @@
1-
user_message = {
2-
"message": {
3-
"role": "user",
4-
"content": "Hello, how are you?",
5-
},
6-
"count": 13,
7-
}
8-
9-
user_message_unicode = {
10-
"message": {
11-
"role": "user",
12-
"content": "á",
13-
},
14-
"count": 8,
15-
}
16-
171
system_message_short = {
182
"message": {
193
"role": "system",
@@ -30,6 +14,14 @@
3014
"count": 25,
3115
}
3216

17+
system_message_long = {
18+
"message": {
19+
"role": "system",
20+
"content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.",
21+
},
22+
"count": 31,
23+
}
24+
3325
system_message_unicode = {
3426
"message": {
3527
"role": "system",
@@ -47,6 +39,68 @@
4739
"count": 20, # Less tokens in older vision preview models
4840
}
4941

42+
user_message = {
43+
"message": {
44+
"role": "user",
45+
"content": "Hello, how are you?",
46+
},
47+
"count": 13,
48+
}
49+
50+
user_message_unicode = {
51+
"message": {
52+
"role": "user",
53+
"content": "á",
54+
},
55+
"count": 8,
56+
}
57+
58+
user_message_perf = {
59+
"message": {
60+
"role": "user",
61+
"content": "What happens in a performance review?",
62+
},
63+
"count": 14,
64+
}
65+
66+
assistant_message_perf = {
67+
"message": {
68+
"role": "assistant",
69+
"content": "During the performance review at Contoso Electronics, the supervisor will discuss the employee's performance over the past year and provide feedback on areas for improvement. They will also provide an opportunity for the employee to discuss their goals and objectives for the upcoming year. The review is a two-way dialogue between managers and employees, and employees will receive a written summary of their performance review which will include a rating of their performance, feedback, and goals and objectives for the upcoming year [employee_handbook-3.pdf].",
70+
},
71+
"count": 106,
72+
}
73+
74+
assistant_message_perf_short = {
75+
"message": {
76+
"role": "assistant",
77+
"content": "The supervisor will discuss the employee's performance and provide feedback on areas for improvement. They will also provide an opportunity for the employee to discuss their goals and objectives for the upcoming year. The review is a two-way dialogue between managers and employees, and employees will receive a written summary of their performance review which will include a rating of their performance, feedback, and goals for the upcoming year [employee_handbook-3.pdf].",
78+
},
79+
"count": 91,
80+
}
81+
82+
user_message_dresscode = {
83+
"message": {
84+
"role": "user",
85+
"content": "Is there a dress code?",
86+
},
87+
"count": 13,
88+
}
89+
90+
assistant_message_dresscode = {
91+
"message": {
92+
"role": "assistant",
93+
"content": "Yes, there is a dress code at Contoso Electronics. Look sharp! [employee_handbook-1.pdf]",
94+
},
95+
"count": 30,
96+
}
97+
user_message_pm = {
98+
"message": {
99+
"role": "user",
100+
"content": "What does a Product Manager do?",
101+
},
102+
"count": 14,
103+
}
50104
text_and_image_message = {
51105
"message": {
52106
"role": "user",
@@ -63,3 +117,20 @@
63117
},
64118
"count": 266,
65119
}
120+
121+
MESSAGE_COUNTS = [
122+
system_message,
123+
system_message_short,
124+
system_message_long,
125+
system_message_unicode,
126+
system_message_with_name,
127+
user_message,
128+
user_message_unicode,
129+
user_message_perf,
130+
user_message_dresscode,
131+
user_message_pm,
132+
assistant_message_perf,
133+
assistant_message_perf_short,
134+
assistant_message_dresscode,
135+
text_and_image_message,
136+
]

tests/test_messagebuilder.py

Lines changed: 140 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,20 @@
11
import pytest
22
from openai_messages_token_helper import build_messages, count_tokens_for_message
33

4-
from .messages import system_message_short, system_message_unicode, user_message, user_message_unicode
4+
from .messages import (
5+
assistant_message_dresscode,
6+
assistant_message_perf,
7+
assistant_message_perf_short,
8+
system_message_long,
9+
system_message_short,
10+
system_message_unicode,
11+
text_and_image_message,
12+
user_message,
13+
user_message_dresscode,
14+
user_message_perf,
15+
user_message_pm,
16+
user_message_unicode,
17+
)
518

619

720
def test_messagebuilder():
@@ -10,6 +23,15 @@ def test_messagebuilder():
1023
assert count_tokens_for_message("gpt-35-turbo", messages[0]) == system_message_short["count"]
1124

1225

26+
def test_messagebuilder_imagemessage():
27+
messages = build_messages(
28+
"gpt-35-turbo",
29+
system_message_short["message"]["content"],
30+
new_user_message=text_and_image_message["message"]["content"],
31+
)
32+
assert messages == [system_message_short["message"], text_and_image_message["message"]]
33+
34+
1335
def test_messagebuilder_append():
1436
messages = build_messages(
1537
"gpt-35-turbo", system_message_short["message"]["content"], new_user_message=user_message["message"]["content"]
@@ -55,3 +77,120 @@ def test_messagebuilder_model_fallback():
5577
assert messages == [system_message_short["message"], user_message["message"]]
5678
assert count_tokens_for_message(model, messages[0], default_to_cl100k=True) == system_message_short["count"]
5779
assert count_tokens_for_message(model, messages[1], default_to_cl100k=True) == user_message["count"]
80+
81+
82+
def test_messagebuilder_pastmessages():
83+
messages = build_messages(
84+
model="gpt-35-turbo",
85+
system_prompt=system_message_short["message"]["content"], # 12 tokens
86+
past_messages=[
87+
user_message_perf["message"], # 14 tokens
88+
assistant_message_perf["message"], # 106 tokens
89+
],
90+
new_user_message=user_message_pm["message"]["content"], # 14 tokens
91+
max_tokens=3000,
92+
)
93+
assert messages == [
94+
system_message_short["message"],
95+
user_message_perf["message"],
96+
assistant_message_perf["message"],
97+
user_message_pm["message"],
98+
]
99+
100+
101+
def test_messagebuilder_pastmessages_truncated():
102+
messages = build_messages(
103+
model="gpt-35-turbo",
104+
system_prompt=system_message_short["message"]["content"], # 12 tokens
105+
past_messages=[
106+
user_message_perf["message"], # 14 tokens
107+
assistant_message_perf["message"], # 106 tokens
108+
],
109+
new_user_message=user_message_pm["message"]["content"], # 14 tokens
110+
max_tokens=10,
111+
)
112+
assert messages == [system_message_short["message"], user_message_pm["message"]]
113+
114+
115+
def test_messagebuilder_pastmessages_truncated_longer():
116+
messages = build_messages(
117+
model="gpt-35-turbo",
118+
system_prompt=system_message_short["message"]["content"], # 12 tokens
119+
past_messages=[
120+
user_message_perf["message"], # 14 tokens
121+
assistant_message_perf["message"], # 106 tokens
122+
user_message_dresscode["message"], # 13 tokens
123+
assistant_message_dresscode["message"], # 30 tokens
124+
],
125+
new_user_message=user_message_pm["message"]["content"], # 14 tokens
126+
max_tokens=69,
127+
)
128+
assert messages == [
129+
system_message_short["message"],
130+
user_message_dresscode["message"],
131+
assistant_message_dresscode["message"],
132+
user_message_pm["message"],
133+
]
134+
135+
136+
def test_messagebuilder_pastmessages_truncated_break_pair():
137+
"""Tests that the truncation breaks the pair of messages."""
138+
messages = build_messages(
139+
model="gpt-35-turbo",
140+
system_prompt=system_message_short["message"]["content"], # 12 tokens
141+
past_messages=[
142+
user_message_perf["message"], # 14 tokens
143+
assistant_message_perf_short["message"], # 91 tokens
144+
user_message_dresscode["message"], # 13 tokens
145+
assistant_message_dresscode["message"], # 30 tokens
146+
],
147+
new_user_message=user_message_pm["message"]["content"], # 14 tokens
148+
max_tokens=160,
149+
)
150+
assert messages == [
151+
system_message_short["message"],
152+
assistant_message_perf_short["message"],
153+
user_message_dresscode["message"],
154+
assistant_message_dresscode["message"],
155+
user_message_pm["message"],
156+
]
157+
158+
159+
def test_messagebuilder_system():
160+
"""Tests that the system message token count is considered."""
161+
messages = build_messages(
162+
model="gpt-35-turbo",
163+
system_prompt=system_message_long["message"]["content"], # 31 tokens
164+
past_messages=[
165+
user_message_perf["message"], # 14 tokens
166+
assistant_message_perf["message"], # 106 tokens
167+
user_message_dresscode["message"], # 13 tokens
168+
assistant_message_dresscode["message"], # 30 tokens
169+
],
170+
new_user_message=user_message_pm["message"]["content"], # 14 tokens
171+
max_tokens=36,
172+
)
173+
assert messages == [system_message_long["message"], user_message_pm["message"]]
174+
175+
176+
def test_messagebuilder_system_fewshots():
177+
messages = build_messages(
178+
model="gpt-35-turbo",
179+
system_prompt=system_message_short["message"]["content"],
180+
new_user_message=user_message_pm["message"]["content"],
181+
past_messages=[],
182+
few_shots=[
183+
{"role": "user", "content": "How did crypto do last year?"},
184+
{"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"},
185+
{"role": "user", "content": "What are my health plans?"},
186+
{"role": "assistant", "content": "Show available health plans"},
187+
],
188+
)
189+
# Make sure messages are in the right order
190+
assert messages[0]["role"] == "system"
191+
assert messages[1]["role"] == "user"
192+
assert messages[2]["role"] == "assistant"
193+
assert messages[3]["role"] == "user"
194+
assert messages[4]["role"] == "assistant"
195+
assert messages[5]["role"] == "user"
196+
assert messages[5]["content"] == user_message_pm["message"]["content"]

tests/verify_openai.py

Lines changed: 2 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,7 @@
33
import azure.identity
44
import openai
55
from dotenv import load_dotenv
6-
from messages import (
7-
system_message,
8-
system_message_short,
9-
system_message_unicode,
10-
system_message_with_name,
11-
text_and_image_message,
12-
user_message,
13-
user_message_unicode,
14-
)
6+
from messages import MESSAGE_COUNTS
157

168
# Setup the OpenAI client to use either Azure OpenAI or OpenAI API
179
load_dotenv()
@@ -32,15 +24,7 @@
3224
MODEL_NAME = os.getenv("OPENAI_MODEL")
3325

3426
# Test the token count for each message
35-
for message_count_pair in [
36-
user_message,
37-
user_message_unicode,
38-
system_message,
39-
system_message_short,
40-
system_message_unicode,
41-
system_message_with_name,
42-
text_and_image_message,
43-
]:
27+
for message_count_pair in MESSAGE_COUNTS:
4428
response = client.chat.completions.create(
4529
model=MODEL_NAME,
4630
temperature=0.7,

0 commit comments

Comments
 (0)