diff --git a/bin/feedio b/bin/feedio index 497def5b..f9178645 100755 --- a/bin/feedio +++ b/bin/feedio @@ -49,7 +49,7 @@ function read(FeedIo $feedIo, string $url) $updateStats = $result->getUpdateStats(); - echo "\033[32mMinimum interval between items : \033[34m".formatDateInterval($updateStats->getMedianInterval())."\033[0m" . PHP_EOL; + echo "\033[32mMinimum interval between items : \033[34m".formatDateInterval($updateStats->getMinInterval())."\033[0m" . PHP_EOL; echo "\033[32mMedian interval : \033[34m".formatDateInterval($updateStats->getMedianInterval())."\033[0m" . PHP_EOL; echo "\033[32mAverage interval : \033[34m".formatDateInterval($updateStats->getAverageInterval())."\033[0m" . PHP_EOL; echo "\033[32mMaximum interval : \033[34m".formatDateInterval($updateStats->getMaxInterval())."\033[0m". PHP_EOL; diff --git a/src/FeedIo/Reader/Result/UpdateStats.php b/src/FeedIo/Reader/Result/UpdateStats.php index a52b0781..bd74a0f4 100644 --- a/src/FeedIo/Reader/Result/UpdateStats.php +++ b/src/FeedIo/Reader/Result/UpdateStats.php @@ -31,6 +31,8 @@ class UpdateStats protected array $intervals = []; + protected int $newestItemDate = 0; + /** * UpdateStats constructor. * @param FeedInterface $feed @@ -38,7 +40,13 @@ class UpdateStats public function __construct( protected FeedInterface $feed ) { - $this->intervals = $this->computeIntervals($this->extractDates($feed)); + $dates = $this->extractDates($feed); + if (count($dates) > 0) { + $this->newestItemDate = min(max($dates), time()); + } else { + $this->newestItemDate = $this->getFeedTimestamp(); + } + $this->intervals = $this->computeIntervals($dates); } /** @@ -57,7 +65,6 @@ public function computeNextUpdate( if ($this->isSleepy($sleepyDuration, $marginRatio)) { return (new \DateTime())->setTimestamp(time() + $sleepyDelay); } - $feedTimeStamp = $this->getFeedTimestamp(); $now = time(); $intervals = [ $this->getAverageInterval(), @@ -66,7 +73,7 @@ public function computeNextUpdate( sort($intervals); $newTimestamp = $now + $minDelay; foreach ($intervals as $interval) { - $computedTimestamp = $this->addInterval($feedTimeStamp, $interval, $marginRatio); + $computedTimestamp = $this->addInterval($this->newestItemDate, $interval, $marginRatio); if ($computedTimestamp > $now) { $newTimestamp = $computedTimestamp; break; @@ -82,7 +89,7 @@ public function computeNextUpdate( */ public function isSleepy(int $sleepyDuration, float $marginRatio): bool { - return time() > $this->addInterval($this->getFeedTimestamp(), $sleepyDuration, $marginRatio); + return time() > $this->addInterval($this->newestItemDate, $sleepyDuration, $marginRatio); } /** @@ -125,7 +132,27 @@ public function getMaxInterval(): int */ public function getAverageInterval(): int { - $total = array_sum($this->intervals); + sort($this->intervals); + + $count = count($this->intervals); + if ($count === 0) { + return 0; + } + + // some feeds could have very old historic + // articles so eliminate them with statistic + $q1 = $this->intervals[floor($count * 0.25)]; + $q3 = $this->intervals[floor($count * 0.75)]; + $iqr = $q3 - $q1; + + $lower_bound = $q1 - 1.5 * $iqr; + $upper_bound = $q3 + 1.5 * $iqr; + + $result = array_filter($this->intervals, function($value) use ($lower_bound, $upper_bound) { + return $value >= $lower_bound && $value <= $upper_bound; + }); + + $total = array_sum($result); return count($this->intervals) ? intval(floor($total / count($this->intervals))) : 0; } @@ -136,9 +163,27 @@ public function getAverageInterval(): int public function getMedianInterval(): int { sort($this->intervals); - $num = floor(count($this->intervals) / 2); - return isset($this->intervals[$num]) ? $this->intervals[$num] : 0; + $count = count($this->intervals); + if ($count === 0) { + return 0; + } + + $num = floor($count / 2); + + if ($count % 2 === 0) { + return intval(floor(($this->intervals[$num - 1] + $this->intervals[$num]) / 2)); + } else { + return $this->intervals[$num]; + } + } + + /** + * @return int + */ + public function getNewestItemDate(): int + { + return $this->newestItemDate; } private function computeIntervals(array $dates): array diff --git a/tests/FeedIo/Reader/Result/UpdateStatsTest.php b/tests/FeedIo/Reader/Result/UpdateStatsTest.php index 76819c3b..51635eca 100644 --- a/tests/FeedIo/Reader/Result/UpdateStatsTest.php +++ b/tests/FeedIo/Reader/Result/UpdateStatsTest.php @@ -35,7 +35,9 @@ public function testIntervals() $this->assertEquals(86400, $stats->getMinInterval()); $nextUpdate = $stats->computeNextUpdate(); $averageInterval = $stats->getAverageInterval(); - $this->assertEquals($feed->getLastModified()->getTimestamp() + intval($averageInterval + 0.1 * $averageInterval), $nextUpdate->getTimestamp()); + $medianInterval = $stats->getMedianInterval(); + $computedInterval = ($medianInterval < $averageInterval ? $medianInterval : $averageInterval); + $this->assertEquals($stats->getNewestItemDate() + intval($computedInterval + 0.1 * $computedInterval), $nextUpdate->getTimestamp()); } public function testSleepyFeed()