@@ -31,14 +31,23 @@ class UpdateStats
3131
3232 protected array $ intervals = [];
3333
34+ protected int $ newestItemDate = 0 ;
35+
3436 /**
3537 * UpdateStats constructor.
3638 * @param FeedInterface $feed
3739 */
3840 public function __construct (
3941 protected FeedInterface $ feed
4042 ) {
41- $ this ->intervals = $ this ->computeIntervals ($ this ->extractDates ($ feed ));
43+ $ dates = $ this ->extractDates ($ feed );
44+ if (count ($ dates ) > 0 ) {
45+ // get the most recent item date that is not in the future
46+ $ this ->newestItemDate = min (max ($ dates ), time ());
47+ } else {
48+ $ this ->newestItemDate = $ this ->getFeedTimestamp ();
49+ }
50+ $ this ->intervals = $ this ->computeIntervals ($ dates );
4251 }
4352
4453 /**
@@ -57,7 +66,6 @@ public function computeNextUpdate(
5766 if ($ this ->isSleepy ($ sleepyDuration , $ marginRatio )) {
5867 return (new \DateTime ())->setTimestamp (time () + $ sleepyDelay );
5968 }
60- $ feedTimeStamp = $ this ->getFeedTimestamp ();
6169 $ now = time ();
6270 $ intervals = [
6371 $ this ->getAverageInterval (),
@@ -66,7 +74,7 @@ public function computeNextUpdate(
6674 sort ($ intervals );
6775 $ newTimestamp = $ now + $ minDelay ;
6876 foreach ($ intervals as $ interval ) {
69- $ computedTimestamp = $ this ->addInterval ($ feedTimeStamp , $ interval , $ marginRatio );
77+ $ computedTimestamp = $ this ->addInterval ($ this -> newestItemDate , $ interval , $ marginRatio );
7078 if ($ computedTimestamp > $ now ) {
7179 $ newTimestamp = $ computedTimestamp ;
7280 break ;
@@ -82,7 +90,7 @@ public function computeNextUpdate(
8290 */
8391 public function isSleepy (int $ sleepyDuration , float $ marginRatio ): bool
8492 {
85- return time () > $ this ->addInterval ($ this ->getFeedTimestamp () , $ sleepyDuration , $ marginRatio );
93+ return time () > $ this ->addInterval ($ this ->newestItemDate , $ sleepyDuration , $ marginRatio );
8694 }
8795
8896 /**
@@ -125,7 +133,27 @@ public function getMaxInterval(): int
125133 */
126134 public function getAverageInterval (): int
127135 {
128- $ total = array_sum ($ this ->intervals );
136+ sort ($ this ->intervals );
137+
138+ $ count = count ($ this ->intervals );
139+ if ($ count === 0 ) {
140+ return 0 ;
141+ }
142+
143+ // some feeds could have very old historic
144+ // articles so eliminate them with statistic
145+ $ q1 = $ this ->intervals [floor ($ count * 0.25 )];
146+ $ q3 = $ this ->intervals [floor ($ count * 0.75 )];
147+ $ iqr = $ q3 - $ q1 ;
148+
149+ $ lower_bound = $ q1 - 1.5 * $ iqr ;
150+ $ upper_bound = $ q3 + 1.5 * $ iqr ;
151+
152+ $ result = array_filter ($ this ->intervals , function ($ value ) use ($ lower_bound , $ upper_bound ) {
153+ return $ value >= $ lower_bound && $ value <= $ upper_bound ;
154+ });
155+
156+ $ total = array_sum ($ result );
129157
130158 return count ($ this ->intervals ) ? intval (floor ($ total / count ($ this ->intervals ))) : 0 ;
131159 }
@@ -136,9 +164,27 @@ public function getAverageInterval(): int
136164 public function getMedianInterval (): int
137165 {
138166 sort ($ this ->intervals );
139- $ num = floor (count ($ this ->intervals ) / 2 );
140167
141- return isset ($ this ->intervals [$ num ]) ? $ this ->intervals [$ num ] : 0 ;
168+ $ count = count ($ this ->intervals );
169+ if ($ count === 0 ) {
170+ return 0 ;
171+ }
172+
173+ $ num = floor ($ count / 2 );
174+
175+ if ($ count % 2 === 0 ) {
176+ return intval (floor (($ this ->intervals [$ num - 1 ] + $ this ->intervals [$ num ]) / 2 ));
177+ } else {
178+ return $ this ->intervals [$ num ];
179+ }
180+ }
181+
182+ /**
183+ * @return int
184+ */
185+ public function getNewestItemDate (): int
186+ {
187+ return $ this ->newestItemDate ;
142188 }
143189
144190 private function computeIntervals (array $ dates ): array
0 commit comments