|
| 1 | +package spectest |
| 2 | + |
| 3 | +import ( |
| 4 | + "context" |
| 5 | + "testing" |
| 6 | + "time" |
| 7 | +) |
| 8 | + |
| 9 | +// TestSpec_SummarizeCommunicationsAcrossSources seeds emails and WhatsApp |
| 10 | +// messages from the same person, then asks the agent to summarize all |
| 11 | +// communications. The agent must autonomously discover and use both email-read |
| 12 | +// and whatsapp-read skills, query both databases, and synthesize the results |
| 13 | +// in a single turn. |
| 14 | +func TestSpec_SummarizeCommunicationsAcrossSources(t *testing.T) { |
| 15 | + EachProvider(t, func(t *testing.T, fx *LocalFixture) { |
| 16 | + fx.GivenEmails(t, []Email{ |
| 17 | + {From: "alice@acme.com", To: "me@example.com", Subject: "Q3 Budget Review", Body: "Hi, please review the Q3 budget spreadsheet I shared. We need to finalize numbers by Friday."}, |
| 18 | + {From: "alice@acme.com", To: "me@example.com", Subject: "Team Offsite in Portland", Body: "I'm thinking we do the offsite in Portland in October. Thoughts?"}, |
| 19 | + }) |
| 20 | + |
| 21 | + fx.GivenWhatsAppMessages(t, []WhatsAppMessage{ |
| 22 | + {SenderJID: "alice@s.whatsapp.net", SenderName: "Alice", ChatJID: "alice@s.whatsapp.net", ChatName: "Alice", Text: "Booked Trattoria Vecchia for Friday dinner, confirmation code TRV-8842."}, |
| 23 | + {SenderJID: "alice@s.whatsapp.net", SenderName: "Alice", ChatJID: "alice@s.whatsapp.net", ChatName: "Alice", Text: "Can you bring the Nakamura prototype to the offsite? Serial number NK-2047."}, |
| 24 | + }) |
| 25 | + |
| 26 | + a := fx.Agent(t) |
| 27 | + ctx, cancel := context.WithTimeout(context.Background(), 180*time.Second) |
| 28 | + defer cancel() |
| 29 | + |
| 30 | + prompt := "Summarize all communications from Alice across both email and WhatsApp." |
| 31 | + result, err := a.Run(ctx, prompt) |
| 32 | + if err != nil { |
| 33 | + t.Fatalf("agent.Run: %v", err) |
| 34 | + } |
| 35 | + |
| 36 | + AssertNotEmpty(t, result) |
| 37 | + AssertJudge(t, fx.Provider, fx.Model, prompt, result, |
| 38 | + "The response must cover topics from BOTH email and WhatsApp. "+ |
| 39 | + "It should mention the Q3 budget review or Portland offsite from email, AND reference "+ |
| 40 | + "Trattoria Vecchia, TRV-8842, Nakamura prototype, or NK-2047 from WhatsApp. "+ |
| 41 | + "It should not claim that data from one source is missing if it was provided.") |
| 42 | + }) |
| 43 | +} |
| 44 | + |
| 45 | +// TestSpec_RecallMemoryAndCorrelateEmails seeds personal memories about a |
| 46 | +// relationship and emails with project details. The agent must autonomously |
| 47 | +// check memories for context about the person, search emails for specifics, |
| 48 | +// and combine both into a coherent answer in a single turn. |
| 49 | +func TestSpec_RecallMemoryAndCorrelateEmails(t *testing.T) { |
| 50 | + EachProvider(t, func(t *testing.T, fx *LocalFixture) { |
| 51 | + fx.GivenMemories(t, []UserMemory{ |
| 52 | + {Content: "Raj Patel is my tech lead at Zephyr Industries", Category: "relationship"}, |
| 53 | + {Content: "Project Firebird has a hard deadline of June 15, 2025", Category: "project"}, |
| 54 | + }) |
| 55 | + |
| 56 | + fx.GivenEmails(t, []Email{ |
| 57 | + {From: "raj.patel@zephyr.io", To: "me@example.com", Subject: "Project Firebird Sprint 7 Retro", Body: "Sprint 7 retro is scheduled for May 22. Please prepare your notes on the auth module refactor."}, |
| 58 | + {From: "raj.patel@zephyr.io", To: "me@example.com", Subject: "Project Firebird Launch Prep", Body: "Client confirmed the staging demo for June 10. We need all QA passed by June 8."}, |
| 59 | + }) |
| 60 | + |
| 61 | + a := fx.Agent(t) |
| 62 | + ctx, cancel := context.WithTimeout(context.Background(), 180*time.Second) |
| 63 | + defer cancel() |
| 64 | + |
| 65 | + prompt := "Tell me everything about Raj and Project Firebird. Check both my memories and emails." |
| 66 | + result, err := a.Run(ctx, prompt) |
| 67 | + if err != nil { |
| 68 | + t.Fatalf("agent.Run: %v", err) |
| 69 | + } |
| 70 | + |
| 71 | + AssertNotEmpty(t, result) |
| 72 | + AssertJudge(t, fx.Provider, fx.Model, prompt, result, |
| 73 | + "The response must include information from BOTH memories and emails. "+ |
| 74 | + "It should mention Raj Patel is the tech lead at Zephyr Industries (from memory) AND mention "+ |
| 75 | + "email subjects or content about Project Firebird Sprint 7 and Launch Prep (from emails). "+ |
| 76 | + "It should not only use one source.") |
| 77 | + }) |
| 78 | +} |
0 commit comments