11import os
2+
23import pytest
4+ from anthropic .types import MessageParam , TextBlock , Usage
35
46from evals .record import DummyRecorder
7+ from evals .solvers .providers .anthropic .anthropic_solver import AnthropicSolver , anth_to_openai_usage
58from evals .task_state import Message , TaskState
6- from evals .solvers .providers .anthropic .anthropic_solver import (
7- AnthropicSolver ,
8- anth_to_openai_usage ,
9- )
10-
11- from anthropic .types import ContentBlock , MessageParam , Usage
129
1310IN_GITHUB_ACTIONS = os .getenv ("GITHUB_ACTIONS" ) == "true"
1411MODEL_NAME = "claude-instant-1.2"
@@ -32,9 +29,7 @@ def dummy_recorder():
3229 yield recorder
3330
3431
35- @pytest .mark .skipif (
36- IN_GITHUB_ACTIONS , reason = "API tests are wasteful to run on every commit."
37- )
32+ @pytest .mark .skipif (IN_GITHUB_ACTIONS , reason = "API tests are wasteful to run on every commit." )
3833def test_solver (dummy_recorder , anthropic_solver ):
3934 """
4035 Test that the solver generates a response coherent with the message history
@@ -55,9 +50,7 @@ def test_solver(dummy_recorder, anthropic_solver):
5550 )
5651
5752 solver_res = solver (task_state = task_state )
58- assert (
59- solver_res .output == answer
60- ), f"Expected '{ answer } ', but got { solver_res .output } "
53+ assert solver_res .output == answer , f"Expected '{ answer } ', but got { solver_res .output } "
6154
6255
6356def test_message_format ():
@@ -71,9 +64,7 @@ def test_message_format():
7164 msgs = [
7265 Message (role = "user" , content = "What is 2 + 2?" ),
7366 Message (role = "system" , content = "reason step by step" ),
74- Message (
75- role = "assistant" , content = "I don't need to reason for this, 2+2 is just 4"
76- ),
67+ Message (role = "assistant" , content = "I don't need to reason for this, 2+2 is just 4" ),
7768 Message (role = "system" , content = "now, given your reasoning, provide the answer" ),
7869 ]
7970 anth_msgs = AnthropicSolver ._convert_msgs_to_anthropic_format (msgs )
@@ -82,24 +73,20 @@ def test_message_format():
8273 MessageParam (
8374 role = "user" ,
8475 content = [
85- ContentBlock (text = "What is 2 + 2?" , type = "text" ),
86- ContentBlock (text = "reason step by step" , type = "text" ),
76+ TextBlock (text = "What is 2 + 2?" , type = "text" ),
77+ TextBlock (text = "reason step by step" , type = "text" ),
8778 ],
8879 ),
8980 MessageParam (
9081 role = "assistant" ,
9182 content = [
92- ContentBlock (
93- text = "I don't need to reason for this, 2+2 is just 4" , type = "text"
94- ),
83+ TextBlock (text = "I don't need to reason for this, 2+2 is just 4" , type = "text" ),
9584 ],
9685 ),
9786 MessageParam (
9887 role = "user" ,
9988 content = [
100- ContentBlock (
101- text = "now, given your reasoning, provide the answer" , type = "text"
102- ),
89+ TextBlock (text = "now, given your reasoning, provide the answer" , type = "text" ),
10390 ],
10491 ),
10592 ]
@@ -126,6 +113,4 @@ def test_anth_to_openai_usage_zero_tokens():
126113 "prompt_tokens" : 0 ,
127114 "total_tokens" : 0 ,
128115 }
129- assert (
130- anth_to_openai_usage (usage ) == expected
131- ), "Zero token cases are not handled correctly."
116+ assert anth_to_openai_usage (usage ) == expected , "Zero token cases are not handled correctly."
0 commit comments