Skip to content

Commit 04074b9

Browse files
author
Dariusz Debowczyk
committed
test: add agent event payload regression coverage
1 parent 1d0e72e commit 04074b9

File tree

2 files changed

+148
-1
lines changed

2 files changed

+148
-1
lines changed

.beads/issues.jsonl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@
219219
{"id":"instructor-php-9sa","title":"Implement BedrockDriver extending OpenAICompatibleDriver","description":"Create dedicated Bedrock driver that extends OpenAI compatibility with Bedrock-specific features like region endpoints, guardrails, and API key authentication","status":"closed","priority":1,"issue_type":"task","created_at":"2025-11-28T22:58:14.10449701+01:00","updated_at":"2025-11-28T23:23:19.622439+01:00","closed_at":"2025-11-28T23:23:19.622439+01:00","dependencies":[{"issue_id":"instructor-php-9sa","depends_on_id":"instructor-php-hzv","type":"blocks","created_at":"2025-11-28T22:58:24.082237238+01:00","created_by":"daemon","metadata":"{}"},{"issue_id":"instructor-php-9sa","depends_on_id":"instructor-php-9hh","type":"blocks","created_at":"2025-11-28T22:58:24.14243328+01:00","created_by":"daemon","metadata":"{}"},{"issue_id":"instructor-php-9sa","depends_on_id":"instructor-php-d8h","type":"blocks","created_at":"2025-11-28T22:58:24.205930645+01:00","created_by":"daemon","metadata":"{}"},{"issue_id":"instructor-php-9sa","depends_on_id":"instructor-php-lh5","type":"parent-child","created_at":"2025-11-28T22:58:33.405902411+01:00","created_by":"daemon","metadata":"{}"},{"issue_id":"instructor-php-9sa","depends_on_id":"instructor-php-ish","type":"blocks","created_at":"2025-11-28T23:15:09.668831077+01:00","created_by":"daemon","metadata":"{}"},{"issue_id":"instructor-php-9sa","depends_on_id":"instructor-php-5sw","type":"blocks","created_at":"2025-11-28T23:15:09.690170662+01:00","created_by":"daemon","metadata":"{}"}]}
220220
{"id":"instructor-php-9xz","title":"Fix ClaudeRequest constructor parameter type issues","description":"ClaudeRequest.php:21 has 'private PathList $additionalDirs = null' but PathList can't be null. Should be 'private ?PathList $additionalDirs = null' or use PathList::none() as default. Also missing validation for required fields like prompt.","status":"closed","priority":1,"issue_type":"bug","created_at":"2025-12-02T03:27:14.079466279+01:00","updated_at":"2025-12-02T03:38:10.29625378+01:00","closed_at":"2025-12-02T03:38:10.296258469+01:00","dependencies":[{"issue_id":"instructor-php-9xz","depends_on_id":"instructor-php-3y5","type":"blocks","created_at":"2025-12-02T03:27:50.002075904+01:00","created_by":"daemon","metadata":"{}"}]}
221221
{"id":"instructor-php-ao3","title":"Catalog Claude CLI commands and flags","description":"Extract CLI command set, non-interactive options, permission-mode flags, IO formats, and agent/system-prompt customization capabilities from CLI reference to inform controlled execution design.","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-02T03:14:30.970943622+01:00","updated_at":"2025-12-02T03:17:56.560064377+01:00","closed_at":"2025-12-02T03:17:56.560067232+01:00"}
222-
{"id":"instructor-php-aquf","title":"Final cleanup + event payload regression test","description":"## Scope\r\n\r\nFinal cleanup pass: delete all dead code, verify no stale references, add event payload regression test, verify green suite.\r\n\r\n## Files to check\r\n\r\n- Grep for: `StepRecorder`, `ErrorRecorder`, `ErrorRecordingResult`, `CanReportObserverState`, `observerState()`, `applyObserverState`, `withNewStepExecution`, `withStepInProgressCleared`, `currentStepNumber` — all should return zero hits\r\n- `AgentErrorContextResolver` — verify no references to deleted continuation types\r\n- `AgentEventBroadcaster` — verify compatible with StopSignal-based events (it already uses `$event-\u003estopSignal` — should be fine)\r\n- `ContinuationEvaluated` event — verify payload fields match what AgentLoop now emits\r\n\r\n## Acceptance criteria\r\n\r\n- Zero grep hits for all deleted type names\r\n- Full test suite green: `vendor/bin/pest packages/agents/tests/`\r\n- New test: `AgentEventPayloadRegressionTest` — runs a multi-step agent scenario via DeterministicAgentDriver, captures events via wiretap, asserts:\r\n - `continuationEvaluated` events contain `stepNumber`, `stopSignal` (with `reason`, `source`, `message`)\r\n - `executionFinished` contains correct `status` and `stopReason`\r\n - Step numbering is sequential\r\n - Error path emits `executionFailed` with correct exception data\r\n\r\n## Validations / checks / tests\r\n\r\n- `vendor/bin/pest packages/agents/tests/` — all green\r\n- New regression test covers happy path + error path + stop exception path\r\n- `grep -r 'StepRecorder\\|ErrorRecorder\\|CanReportObserverState\\|applyObserverState\\|withNewStepExecution\\|withStepInProgressCleared' packages/agents/src/` returns empty\r\n","status":"open","priority":2,"issue_type":"task","created_at":"2026-01-30T13:33:28.310661+01:00","created_by":"Dariusz Debowczyk","updated_at":"2026-01-30T13:33:28.310661+01:00","labels":["agents","refactor","state-flow","testing"]}
222+
{"id":"instructor-php-aquf","title":"Final cleanup + event payload regression test","description":"## Scope\r\n\r\nFinal cleanup pass: delete all dead code, verify no stale references, add event payload regression test, verify green suite.\r\n\r\n## Files to check\r\n\r\n- Grep for: `StepRecorder`, `ErrorRecorder`, `ErrorRecordingResult`, `CanReportObserverState`, `observerState()`, `applyObserverState`, `withNewStepExecution`, `withStepInProgressCleared`, `currentStepNumber` — all should return zero hits\r\n- `AgentErrorContextResolver` — verify no references to deleted continuation types\r\n- `AgentEventBroadcaster` — verify compatible with StopSignal-based events (it already uses `$event-\u003estopSignal` — should be fine)\r\n- `ContinuationEvaluated` event — verify payload fields match what AgentLoop now emits\r\n\r\n## Acceptance criteria\r\n\r\n- Zero grep hits for all deleted type names\r\n- Full test suite green: `vendor/bin/pest packages/agents/tests/`\r\n- New test: `AgentEventPayloadRegressionTest` — runs a multi-step agent scenario via DeterministicAgentDriver, captures events via wiretap, asserts:\r\n - `continuationEvaluated` events contain `stepNumber`, `stopSignal` (with `reason`, `source`, `message`)\r\n - `executionFinished` contains correct `status` and `stopReason`\r\n - Step numbering is sequential\r\n - Error path emits `executionFailed` with correct exception data\r\n\r\n## Validations / checks / tests\r\n\r\n- `vendor/bin/pest packages/agents/tests/` — all green\r\n- New regression test covers happy path + error path + stop exception path\r\n- `grep -r 'StepRecorder\\|ErrorRecorder\\|CanReportObserverState\\|applyObserverState\\|withNewStepExecution\\|withStepInProgressCleared' packages/agents/src/` returns empty\r\n","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-30T13:33:28.310661+01:00","created_by":"Dariusz Debowczyk","updated_at":"2026-02-21T21:11:07.852478+01:00","closed_at":"2026-02-21T21:11:07.852478+01:00","close_reason":"Added AgentEventPayloadRegressionTest covering continuation payloads, stop-signal fields, completed/stopped statuses, and executionFailed exception payload; verified dead-symbol grep is clean and full agents suite is green.","labels":["agents","refactor","state-flow","testing"]}
223223
{"id":"instructor-php-bn3","title":"Move TEST_MATRIX.md to docs-internal/testing/","description":"Move TEST_MATRIX.md from root to docs-internal/testing/TEST_MATRIX.md and update any references in other documentation files.","status":"closed","priority":1,"issue_type":"task","created_at":"2025-11-28T12:26:16.601850114+01:00","updated_at":"2025-11-28T12:39:52.16898705+01:00","closed_at":"2025-11-28T12:39:52.16898705+01:00","dependencies":[{"issue_id":"instructor-php-bn3","depends_on_id":"instructor-php-ypb","type":"parent-child","created_at":"2025-11-28T12:26:35.08349776+01:00","created_by":"daemon","metadata":"{}"}]}
224224
{"id":"instructor-php-bqut","title":"Implement BeforeToolMiddleware","description":"Create packages/addons/src/Agent/Core/Middleware/BeforeToolMiddleware.php\n\nWraps a callable to run before tool execution. Features:\n- Constructor takes Closure callback and optional HookMatcher\n- Skip execution when matcher fails\n- Callback can return:\n - null → block the call (return AgentExecution::blocked())\n - ToolCall → proceed with modified call\n - void/same call → proceed unchanged\n\nReference implementation in hooks-middleware-plan.md:137-163","status":"closed","priority":1,"issue_type":"task","created_at":"2026-01-23T08:27:52.047014+01:00","created_by":"ddebowczyk","updated_at":"2026-01-23T08:38:41.909401+01:00","closed_at":"2026-01-23T08:38:41.909401+01:00","close_reason":"Created BeforeToolMiddleware at packages/addons/src/Agent/Core/Middleware/BeforeToolMiddleware.php"}
225225
{"id":"instructor-php-c5d","title":"Implement retrospective demo","status":"closed","priority":2,"issue_type":"task","owner":"ddebowczyk@guidewire.com","created_at":"2026-02-19T22:33:02.917137+01:00","created_by":"Dariusz Debowczyk","updated_at":"2026-02-19T22:33:07.287333+01:00","closed_at":"2026-02-19T22:33:07.287333+01:00","close_reason":"Demo complete"}
Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
<?php declare(strict_types=1);
2+
3+
namespace Cognesy\Agents\Tests\Unit\Agent;
4+
5+
use Cognesy\Agents\Builder\AgentBuilder;
6+
use Cognesy\Agents\Capability\Core\UseDriver;
7+
use Cognesy\Agents\Collections\Tools;
8+
use Cognesy\Agents\Continuation\AgentStopException;
9+
use Cognesy\Agents\Continuation\StopReason;
10+
use Cognesy\Agents\Continuation\StopSignal;
11+
use Cognesy\Agents\Data\AgentState;
12+
use Cognesy\Agents\Data\AgentStep;
13+
use Cognesy\Agents\Drivers\CanUseTools;
14+
use Cognesy\Agents\Drivers\Testing\FakeAgentDriver;
15+
use Cognesy\Agents\Drivers\Testing\ScenarioStep;
16+
use Cognesy\Agents\Enums\ExecutionStatus;
17+
use Cognesy\Agents\Events\AgentExecutionCompleted;
18+
use Cognesy\Agents\Events\AgentExecutionFailed;
19+
use Cognesy\Agents\Events\AgentExecutionStopped;
20+
use Cognesy\Agents\Events\ContinuationEvaluated;
21+
use Cognesy\Agents\Tool\Contracts\CanExecuteToolCalls;
22+
use Cognesy\Messages\Messages;
23+
24+
describe('Agent event payload regression', function () {
25+
it('emits sequential continuation payloads and completed execution status', function () {
26+
$driver = FakeAgentDriver::fromSteps(
27+
ScenarioStep::toolCall('noop', executeTools: false),
28+
ScenarioStep::final('step two'),
29+
);
30+
31+
$events = [];
32+
$agent = AgentBuilder::base()
33+
->withCapability(new UseDriver($driver))
34+
->build()
35+
->wiretap(static function (object $event) use (&$events): void {
36+
$events[] = $event;
37+
});
38+
39+
$final = $agent->execute(
40+
AgentState::empty()->withMessages(Messages::fromString('ping'))
41+
);
42+
43+
$continuations = array_values(array_filter(
44+
$events,
45+
static fn(object $event): bool => $event instanceof ContinuationEvaluated,
46+
));
47+
$completed = array_values(array_filter(
48+
$events,
49+
static fn(object $event): bool => $event instanceof AgentExecutionCompleted,
50+
));
51+
52+
expect($continuations)->toHaveCount(2);
53+
expect(array_map(static fn(ContinuationEvaluated $e): int => $e->stepNumber, $continuations))
54+
->toBe([1, 2]);
55+
expect($continuations[1]->shouldStop())->toBeTrue();
56+
expect($completed)->toHaveCount(1);
57+
expect($completed[0]->status)->toBe(ExecutionStatus::Completed);
58+
expect($final->status())->toBe(ExecutionStatus::Completed);
59+
});
60+
61+
it('emits stop payload with stop signal reason/source/message', function () {
62+
$signal = new StopSignal(
63+
reason: StopReason::Completed,
64+
message: 'stop now',
65+
source: 'RegressionStop',
66+
);
67+
68+
$driver = new class($signal) implements CanUseTools {
69+
public function __construct(private StopSignal $signal) {}
70+
71+
public function useTools(AgentState $state, Tools $tools, CanExecuteToolCalls $executor): AgentState {
72+
$step = new AgentStep(inputMessages: $state->messages());
73+
throw new AgentStopException($this->signal, $step, source: 'RegressionStop');
74+
}
75+
};
76+
77+
$events = [];
78+
$agent = AgentBuilder::base()
79+
->withCapability(new UseDriver($driver))
80+
->build()
81+
->wiretap(static function (object $event) use (&$events): void {
82+
$events[] = $event;
83+
});
84+
85+
$final = $agent->execute(
86+
AgentState::empty()->withMessages(Messages::fromString('ping'))
87+
);
88+
89+
$continuation = current(array_values(array_filter(
90+
$events,
91+
static fn(object $event): bool => $event instanceof ContinuationEvaluated,
92+
)));
93+
$stopped = current(array_values(array_filter(
94+
$events,
95+
static fn(object $event): bool => $event instanceof AgentExecutionStopped,
96+
)));
97+
$completed = current(array_values(array_filter(
98+
$events,
99+
static fn(object $event): bool => $event instanceof AgentExecutionCompleted,
100+
)));
101+
102+
expect($continuation)->toBeInstanceOf(ContinuationEvaluated::class);
103+
expect($continuation->stepNumber)->toBe(0);
104+
expect($continuation->stopSignal())->not->toBeNull();
105+
expect($continuation->stopSignal()?->reason)->toBe(StopReason::StopRequested);
106+
expect($continuation->stopSignal()?->source)->toBe('RegressionStop');
107+
expect($continuation->stopSignal()?->message)->toBe('stop now');
108+
expect($stopped)->toBeInstanceOf(AgentExecutionStopped::class);
109+
expect($stopped->stopReason)->toBe(StopReason::StopRequested);
110+
expect($stopped->stopMessage)->toBe('stop now');
111+
expect($stopped->source)->toBe('RegressionStop');
112+
expect($completed)->toBeInstanceOf(AgentExecutionCompleted::class);
113+
expect($completed->status)->toBe(ExecutionStatus::Stopped);
114+
expect($final->status())->toBe(ExecutionStatus::Stopped);
115+
});
116+
117+
it('emits executionFailed with exception payload on hard error', function () {
118+
$driver = new class implements CanUseTools {
119+
public function useTools(AgentState $state, Tools $tools, CanExecuteToolCalls $executor): AgentState {
120+
throw new \RuntimeException('boom regression');
121+
}
122+
};
123+
124+
$events = [];
125+
$agent = AgentBuilder::base()
126+
->withCapability(new UseDriver($driver))
127+
->build()
128+
->wiretap(static function (object $event) use (&$events): void {
129+
$events[] = $event;
130+
});
131+
132+
$final = $agent->execute(
133+
AgentState::empty()->withMessages(Messages::fromString('ping'))
134+
);
135+
136+
$failed = current(array_values(array_filter(
137+
$events,
138+
static fn(object $event): bool => $event instanceof AgentExecutionFailed,
139+
)));
140+
141+
expect($failed)->toBeInstanceOf(AgentExecutionFailed::class);
142+
expect($failed->exception->getMessage())->toBe('boom regression');
143+
expect($failed->exception)->toBeInstanceOf(\RuntimeException::class);
144+
expect($failed->status)->toBe(ExecutionStatus::Failed);
145+
expect($final->status())->toBe(ExecutionStatus::Failed);
146+
});
147+
});

0 commit comments

Comments
 (0)