-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy patheval-example.prompt.ts
More file actions
76 lines (68 loc) · 1.82 KB
/
eval-example.prompt.ts
File metadata and controls
76 lines (68 loc) · 1.82 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
/**
* Example: Testing prompts with Marrakesh SDK
*/
import { prompt, tool } from "@marrakesh/core";
import { z } from "zod";
import { openai } from "@ai-sdk/openai";
// Define a simple weather tool
const getWeather = tool({
description: "Get the current weather for a city",
parameters: z.object({
city: z.string().describe("The city name"),
}),
execute: async ({ city }) => {
// Simulate API call
return {
city,
temperature: 72,
conditions: "Sunny",
};
},
});
// Create a weather agent with test cases
export const weatherAgent = prompt("You are a helpful weather assistant")
.tool(getWeather)
.test({
cases: [
{
input: "What's the weather in Paris?",
expect: { city: "Paris" },
name: "Should extract Paris as city",
},
{
input: "Is it raining in Tokyo?",
expect: { city: "Tokyo" },
name: "Should extract Tokyo as city",
},
{
input: "Tell me the temperature in New York",
expect: { city: "New York" },
name: "Should handle multi-word city names",
},
],
executors: [
{
model: openai("gpt-4"),
maxSteps: 3,
},
],
});
// Run tests directly if this file is executed
if (import.meta.url === `file://${process.argv[1]}`) {
const results = await weatherAgent.run();
console.log(`\nTests: ${results.passed}/${results.total} passed`);
console.log(`Duration: ${results.duration}ms\n`);
// Exit with appropriate code
process.exit(results.failed > 0 ? 1 : 0);
}
// Example: Single evaluation
export async function runSingleEval() {
const result = await prompt("Translate to French").eval("Hello", {
executor: {
model: openai("gpt-4-turbo"),
},
expect: "Bonjour",
});
console.log("Eval result:", result);
return result;
}