Skip to content

Commit 24f53d4

Browse files
committed
prompt adjustments
1 parent 12498f9 commit 24f53d4

File tree

1 file changed

+24
-20
lines changed

1 file changed

+24
-20
lines changed

examples/evaluation/use-cases/EvalsAPI_Audio_Inputs.ipynb

Lines changed: 24 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -301,25 +301,29 @@
301301
" \"name\": \"Reference answer audio model grader\",\n",
302302
" \"model\": \"gpt-4o-audio-preview\",\n",
303303
" \"input\": [\n",
304-
" {\n",
305-
" \"role\": \"user\",\n",
306-
" \"content\": [\n",
307-
" {\n",
308-
" \"type\": \"input_text\",\n",
309-
" \"text\": \"Evaluate this audio clip to see if it reaches the same conclusion as the reference answer. Score the answer a 1 if it does, 0 if it does not. Reference answer: {{item.official_answer}}\",\n",
310-
" },\n",
311-
" {\n",
312-
" \"type\": \"input_audio\",\n",
313-
" \"input_audio\": {\n",
314-
" \"data\": \"{{ sample.output_audio.data }}\",\n",
315-
" \"format\": \"wav\",\n",
316-
" },\n",
317-
" },\n",
318-
" ],\n",
319-
" },\n",
320-
" ],\n",
304+
" {\n",
305+
" \"role\": \"system\",\n",
306+
" \"content\": 'You are a helpful assistant that evaluates audio clips to judge whether they match a provided reference answer. The audio clip is the model''s response to the question. Respond ONLY with a single JSON object matching: {\"steps\":[{\"description\":\"string\",\"conclusion\":\"string\"}],\"result\":number}. Do not include any extra text. result must be a float in [0.0, 1.0].'\n",
307+
" },\n",
308+
" {\n",
309+
" \"role\": \"user\",\n",
310+
" \"content\": [\n",
311+
" {\n",
312+
" \"type\": \"input_text\",\n",
313+
" \"text\": \"Evaluate this audio clip to see if it reaches the same conclusion as the reference answer. Reference answer: {{item.official_answer}}\",\n",
314+
" },\n",
315+
" {\n",
316+
" \"type\": \"input_audio\",\n",
317+
" \"input_audio\": {\n",
318+
" \"data\": \"{{ sample.output_audio.data }}\",\n",
319+
" \"format\": \"wav\",\n",
320+
" },\n",
321+
" },\n",
322+
" ],\n",
323+
" },\n",
324+
" ],\n",
321325
" \"range\": [0, 1],\n",
322-
" \"pass_threshold\": 0.9,\n",
326+
" \"pass_threshold\": 0.6,\n",
323327
"}"
324328
]
325329
},
@@ -385,14 +389,14 @@
385389
"sampling_messages = [\n",
386390
" {\n",
387391
" \"role\": \"system\",\n",
388-
" \"content\": \"You are a helpful and obedient assistant that can answer questions with audio input. You will be given an audio input containing a question and instructions on exactly how to answer. For example, if the user asks for a single word response, then you should only reply with a single word answer.\"\n",
392+
" \"content\": \"You are a helpful and obedient assistant that can answer questions with audio input. You will be given an audio input containing a question to answer.\"\n",
389393
" },\n",
390394
" {\n",
391395
" \"role\": \"user\",\n",
392396
" \"type\": \"message\",\n",
393397
" \"content\": {\n",
394398
" \"type\": \"input_text\",\n",
395-
" \"text\": \"Answer the following question by replying with a single word answer: 'valid' or 'invalid'.\"\n",
399+
" \"text\": \"Answer the following question by replying with brief reasoning statements and a conclusion with a single word answer: 'valid' or 'invalid'.\"\n",
396400
" }\n",
397401
" },\n",
398402
" {\n",

0 commit comments

Comments
 (0)