Skip to content

Commit bb61481

Browse files
committed
Added hallucination test
1 parent 0823caa commit bb61481

File tree

1 file changed

+50
-40
lines changed

1 file changed

+50
-40
lines changed

tests/test_interpreter.py

Lines changed: 50 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,56 @@
2424
from websocket import create_connection
2525

2626

27+
def test_hallucinations():
28+
# We should be resiliant to common hallucinations.
29+
30+
code = """10+12executeexecute\n"""
31+
32+
interpreter.messages = [
33+
{"role": "assistant", "type": "code", "format": "python", "content": code}
34+
]
35+
for chunk in interpreter._respond_and_store():
36+
if chunk.get("format") == "output":
37+
assert chunk.get("content") == "22"
38+
break
39+
40+
code = """{
41+
"language": "python",
42+
"code": "10+12"
43+
}"""
44+
45+
interpreter.messages = [
46+
{"role": "assistant", "type": "code", "format": "python", "content": code}
47+
]
48+
for chunk in interpreter._respond_and_store():
49+
if chunk.get("format") == "output":
50+
assert chunk.get("content") == "22"
51+
break
52+
53+
code = """functions.execute({
54+
"language": "python",
55+
"code": "10+12"
56+
})"""
57+
58+
interpreter.messages = [
59+
{"role": "assistant", "type": "code", "format": "python", "content": code}
60+
]
61+
for chunk in interpreter._respond_and_store():
62+
if chunk.get("format") == "output":
63+
assert chunk.get("content") == "22"
64+
break
65+
66+
code = """{language: "python", code: "print('hello')" }"""
67+
68+
interpreter.messages = [
69+
{"role": "assistant", "type": "code", "format": "python", "content": code}
70+
]
71+
for chunk in interpreter._respond_and_store():
72+
if chunk.get("format") == "output":
73+
assert chunk.get("content").strip() == "hello"
74+
break
75+
76+
2777
def run_auth_server():
2878
os.environ["INTERPRETER_REQUIRE_ACKNOWLEDGE"] = "True"
2979
os.environ["INTERPRETER_API_KEY"] = "testing"
@@ -602,46 +652,6 @@ async def test_fastapi_server():
602652
process.join()
603653

604654

605-
def test_hallucinations():
606-
# We should be resiliant to common hallucinations.
607-
608-
code = """{
609-
"language": "python",
610-
"code": "10+12"
611-
}"""
612-
613-
interpreter.messages = [
614-
{"role": "assistant", "type": "code", "format": "python", "content": code}
615-
]
616-
for chunk in interpreter._respond_and_store():
617-
if chunk.get("format") == "output":
618-
assert chunk.get("content") == "22"
619-
break
620-
621-
code = """functions.execute({
622-
"language": "python",
623-
"code": "10+12"
624-
})"""
625-
626-
interpreter.messages = [
627-
{"role": "assistant", "type": "code", "format": "python", "content": code}
628-
]
629-
for chunk in interpreter._respond_and_store():
630-
if chunk.get("format") == "output":
631-
assert chunk.get("content") == "22"
632-
break
633-
634-
code = """{language: "python", code: "print('hello')" }"""
635-
636-
interpreter.messages = [
637-
{"role": "assistant", "type": "code", "format": "python", "content": code}
638-
]
639-
for chunk in interpreter._respond_and_store():
640-
if chunk.get("format") == "output":
641-
assert chunk.get("content").strip() == "hello"
642-
break
643-
644-
645655
@pytest.mark.skip(reason="Mac only")
646656
def test_sms():
647657
sms = interpreter.computer.sms

0 commit comments

Comments
 (0)