Skip to content

Commit 02a98fe

Browse files
author
Pietro Albini
committed
Make backlog processing instantaneous
This commit removes all the code which skips backlog manually, since I discovered if you pass a negative offset to `getUpdates` all the old messages are automatically discarded. Because of this new method, the updates skipping process is now instantaneous.
1 parent 6f7c517 commit 02a98fe

File tree

3 files changed

+24
-78
lines changed

3 files changed

+24
-78
lines changed

botogram/runner/processes.py

Lines changed: 12 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ def loop(self):
186186
return
187187

188188
try:
189-
updates, backlog = self.fetcher.fetch()
189+
updates = self.fetcher.fetch()
190190
except updates_module.AnotherInstanceRunningError:
191191
self.handle_another_instance()
192192
return
@@ -196,26 +196,17 @@ def loop(self):
196196
self.logger.debug("Exception content: %s" % str(e))
197197
return
198198

199-
if backlog:
200-
if len(backlog) == 1:
201-
self.logger.debug("Skipped update #%s because it's coming "
202-
"from the backlog" % backlog[0].update_id)
203-
else:
204-
self.logger.debug("Skipped updates #%s to #%s because they're "
205-
"coming from the backlog" % (
206-
backlog[0].update_id,
207-
backlog[-1].update_id
208-
))
209-
210-
if updates:
211-
result = []
212-
for update in updates:
213-
update.set_api(None)
214-
result.append(jobs.Job(self.bot_id, jobs.process_update, {
215-
"update": update,
216-
}))
217-
218-
self.ipc.command("jobs.bulk_put", result)
199+
if not updates:
200+
return
201+
202+
result = []
203+
for update in updates:
204+
update.set_api(None)
205+
result.append(jobs.Job(self.bot_id, jobs.process_update, {
206+
"update": update,
207+
}))
208+
209+
self.ipc.command("jobs.bulk_put", result)
219210

220211
def handle_another_instance(self):
221212
"""Code run when another instance of the bot is running"""

botogram/updates.py

Lines changed: 10 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@
66
Released under the MIT license
77
"""
88

9-
import time
10-
119
from . import objects
1210
from . import api
1311

@@ -31,7 +29,6 @@ class UpdatesFetcher:
3129
def __init__(self, bot):
3230
self._bot = bot
3331
self._last_id = -1
34-
self._started_at = time.time()
3532
self._backlog_processed = False
3633

3734
# Don't treat backlog as backlog if bot.process_backlog is True
@@ -55,6 +52,15 @@ def _fetch_updates(self, timeout):
5552

5653
def fetch(self, timeout=1):
5754
"""Fetch the latest updates"""
55+
if not self._backlog_processed:
56+
# Just erase all the previous messages
57+
self._bot.api.call("getUpdates", {
58+
"offset": -1,
59+
"timeout": 0,
60+
}, expect=objects.Updates)
61+
62+
self._backlog_processed = True
63+
5864
updates = self._fetch_updates(timeout)
5965

6066
# If there are no updates just ignore this block
@@ -63,59 +69,7 @@ def fetch(self, timeout=1):
6369
except IndexError:
6470
pass
6571

66-
if self._backlog_processed:
67-
return updates, []
68-
69-
# Now start to filter backlog from messages to process
70-
# This is faster than a plain check for every item with a for, and
71-
# helps a lot if you have a *really* long backlog
72-
73-
if not updates:
74-
self._backlog_processed = True
75-
return [], []
76-
77-
to_check = len(updates) - 1
78-
check_chunk = len(updates)
79-
direction = -1 # -1 is backward, 1 is forward
80-
last = 0
81-
82-
while True:
83-
# Check if the current message to check is from the backlog
84-
# Adjust the check direction accordingly
85-
update = updates[to_check]
86-
if update.message.date < self._started_at:
87-
direction = 1
88-
else:
89-
direction = -1
90-
91-
# The next chunk to check is exactly half of the previous one
92-
check_chunk = int(check_chunk / 2)
93-
if check_chunk == 0:
94-
check_chunk = 1
95-
96-
# If the last one was from the backlog, the current one is not and
97-
# this is at the max precision (step of 1), then consider it as
98-
# found
99-
if direction == 1 and last == -1 and check_chunk == 1:
100-
to_check += 1 # Return to the previous state
101-
self._backlog_processed = True
102-
break
103-
last = direction
104-
105-
# Set the next update to check
106-
to_check += direction * check_chunk
107-
108-
# If the next update to check is outside the bounds of the list
109-
# just break the loop (and say the backlog is processed if no
110-
# updates are from it)
111-
if to_check < 0:
112-
self._backlog_processed = True
113-
break
114-
elif to_check >= len(updates):
115-
break
116-
117-
# The first is the updates to process, the second the backlog
118-
return updates[to_check:], updates[:to_check]
72+
return updates
11973

12074
def block_until_alone(self, treshold=4, check_timeout=1, when_stop=None):
12175
"""Returns when this one is the only instance of the bot"""

docs/changelog.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,8 @@ Changes
6565
Performance improvements
6666
------------------------
6767

68-
* Updates initial filtering and queueing performance improved
68+
* Updates queueing performance improved
69+
* Backlog processing is now instantaneous
6970

7071
Deprecated features
7172
-------------------

0 commit comments

Comments
 (0)