Skip to content

Commit 6fb4c8d

Browse files
committed
fix: update average interval calculation to use filtered count and add related tests
1 parent 9e751a1 commit 6fb4c8d

File tree

2 files changed

+87
-1
lines changed

2 files changed

+87
-1
lines changed

src/FeedIo/Reader/Result/UpdateStats.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ public function getAverageInterval(): int
155155

156156
$total = array_sum($result);
157157

158-
return count($this->intervals) ? intval(floor($total / count($this->intervals))) : 0;
158+
return count($result) ? intval(floor($total / count($result))) : 0;
159159
}
160160

161161
/**

tests/FeedIo/Reader/Result/UpdateStatsTest.php

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,92 @@ public function testBothImplementationsAreSafe()
340340
}
341341
}
342342

343+
/**
344+
* Test that average is calculated using filtered count, not original count
345+
* This ensures the average is based only on non-outlier values
346+
*/
347+
public function testAverageIntervalUsesFilteredCount()
348+
{
349+
$feed = new Feed();
350+
$feed->setLastModified(new \DateTime('-1 day'));
351+
352+
// Create a dataset with clear outliers
353+
// Regular intervals: ~1 day (86400 seconds)
354+
// Outlier: ~100 days (8640000 seconds)
355+
$dates = [
356+
'-1 day', // Most recent
357+
'-2 days', // 1 day interval
358+
'-3 days', // 1 day interval
359+
'-4 days', // 1 day interval
360+
'-5 days', // 1 day interval
361+
'-105 days', // 100 day interval (OUTLIER - should be filtered out)
362+
];
363+
364+
foreach ($dates as $date) {
365+
$item = new Feed\Item();
366+
$item->setLastModified(new \DateTime($date));
367+
$feed->add($item);
368+
}
369+
370+
$stats = new UpdateStats($feed);
371+
$intervals = $stats->getIntervals();
372+
373+
// We should have 5 intervals total
374+
$this->assertCount(5, $intervals);
375+
376+
// Get the average
377+
$average = $stats->getAverageInterval();
378+
379+
// The average should be close to 86400 (1 day)
380+
// If it was incorrectly dividing by the original count (5),
381+
// it would be much lower because it would include the large outlier
382+
// in the sum but divide by all 5 values.
383+
384+
// With outlier filtering:
385+
// - Filtered values: [86400, 86400, 86400, 86400] (4 values)
386+
// - Sum: 345600
387+
// - Average: 345600 / 4 = 86400
388+
389+
// Without correct filtering (old bug):
390+
// - Sum of filtered: 345600
391+
// - Divided by original count: 345600 / 5 = 69120 (WRONG!)
392+
393+
$this->assertEquals(86400, $average, 'Average should be 86400 (1 day) when calculated with filtered count');
394+
395+
// Verify it's not the incorrect value
396+
$this->assertNotEquals(69120, $average, 'Average should not use original count as divisor');
397+
}
398+
399+
/**
400+
* Test average calculation with all values filtered out
401+
* Edge case: what if IQR filtering removes everything?
402+
*/
403+
public function testAverageIntervalWhenAllValuesFiltered()
404+
{
405+
$feed = new Feed();
406+
$feed->setLastModified(new \DateTime('-1 day'));
407+
408+
// Create items that might all be considered outliers
409+
// This is a pathological case but we should handle it gracefully
410+
$dates = [
411+
'-1 day',
412+
'-2 days',
413+
];
414+
415+
foreach ($dates as $date) {
416+
$item = new Feed\Item();
417+
$item->setLastModified(new \DateTime($date));
418+
$feed->add($item);
419+
}
420+
421+
$stats = new UpdateStats($feed);
422+
423+
// Should return 0 if no values pass the filter, not crash
424+
$average = $stats->getAverageInterval();
425+
$this->assertIsInt($average);
426+
$this->assertGreaterThanOrEqual(0, $average);
427+
}
428+
343429
private function getDates(): array
344430
{
345431
return [

0 commit comments

Comments
 (0)