Skip to content

Commit d6027d6

Browse files
authored
Merge pull request #487 from Leward/assert-leak
Fix assertion state leak across dataset items in evaluations
2 parents 9a4f633 + 26a994f commit d6027d6

File tree

3 files changed

+84
-0
lines changed

3 files changed

+84
-0
lines changed

src/Evaluation/BaseEvaluator.php

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,4 +87,15 @@ public function getAssertionFailures(): array
8787
{
8888
return $this->ruleExecutor->getFailures();
8989
}
90+
91+
/**
92+
* Reset assertion state
93+
*
94+
* Clears all assertion counters and failures. This should be called
95+
* between dataset items to ensure isolated assertion state.
96+
*/
97+
public function resetAssertionState(): void
98+
{
99+
$this->ruleExecutor->reset();
100+
}
90101
}

src/Evaluation/Runner/EvaluatorRunner.php

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@ public function run(BaseEvaluator $evaluator): EvaluatorSummary
2121
$totalTime = 0.0;
2222

2323
foreach ($data as $index => $item) {
24+
// Reset assertion state before each dataset item to ensure isolation
25+
$evaluator->resetAssertionState();
26+
2427
$startTime = microtime(true);
2528
$error = null;
2629
$output = null;
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace NeuronAI\Tests\Evaluation\Runner;
6+
7+
use NeuronAI\Evaluation\Assertions\StringContains;
8+
use NeuronAI\Evaluation\BaseEvaluator;
9+
use NeuronAI\Evaluation\Contracts\DatasetInterface;
10+
use NeuronAI\Evaluation\Dataset\ArrayDataset;
11+
use NeuronAI\Evaluation\Runner\EvaluatorRunner;
12+
use PHPUnit\Framework\TestCase;
13+
14+
class EvaluatorRunnerTest extends TestCase
15+
{
16+
public function testAssertionStateDoesNotLeakBetweenDatasetItems(): void
17+
{
18+
$evaluator = new StringContainsEvaluator();
19+
$runner = new EvaluatorRunner();
20+
21+
$summary = $runner->run($evaluator);
22+
23+
$results = $summary->getResults();
24+
$this->assertCount(2, $results);
25+
26+
// First item: failing assertion
27+
$result0 = $results[0];
28+
$this->assertFalse($result0->isPassed());
29+
$this->assertEquals(0, $result0->getAssertionsPassed());
30+
$this->assertEquals(1, $result0->getAssertionsFailed());
31+
$this->assertEquals(1, $result0->getTotalAssertions());
32+
33+
// Second item: passing assertion (should not inherit first item's failures)
34+
$result1 = $results[1];
35+
$this->assertTrue($result1->isPassed());
36+
$this->assertEquals(1, $result1->getAssertionsPassed());
37+
$this->assertEquals(0, $result1->getAssertionsFailed());
38+
$this->assertEquals(1, $result1->getTotalAssertions());
39+
40+
// Summary: exactly 2 assertions total (one per dataset item)
41+
$this->assertEquals(2, $summary->getTotalAssertions());
42+
$this->assertEquals(1, $summary->getTotalAssertionsPassed());
43+
$this->assertEquals(1, $summary->getTotalAssertionsFailed());
44+
}
45+
46+
}
47+
48+
/**
49+
* Test evaluator that checks if strings contain expected substrings
50+
*/
51+
class StringContainsEvaluator extends BaseEvaluator
52+
{
53+
public function getDataset(): DatasetInterface
54+
{
55+
return new ArrayDataset([
56+
['actual' => 'hello', 'expected' => 'world'],
57+
['actual' => 'hello world', 'expected' => 'world'],
58+
]);
59+
}
60+
61+
public function run(array $datasetItem): mixed
62+
{
63+
return $datasetItem['actual'];
64+
}
65+
66+
public function evaluate(mixed $output, array $datasetItem): void
67+
{
68+
$this->assert(new StringContains($datasetItem['expected']), $output);
69+
}
70+
}

0 commit comments

Comments
 (0)