@@ -282,20 +282,61 @@ class ChangeWatcher(object):
282282 "hub_config" : "config" ,
283283 }
284284
285+ # Maximum number of events to coalesce per publish cycle. When the hub is
286+ # under heavy load (large builds, many workers) the event queue can fill
287+ # much faster than individual broadcasts can be sent, starving the Tornado
288+ # IOLoop and preventing HTTP responses and WebSocket heartbeats from being
289+ # processed. Draining in batches and deduplicating keeps the loop healthy.
290+ PUBLISH_BATCH_SIZE = 250
291+
285292 @classmethod
286293 def publish (cls ):
287294 cls .do_publish = True
288295
289296 async def do ():
290297 while cls .do_publish :
298+ # Block until at least one event is available.
291299 evt = await cls .event_queue .get ()
292- for listener in cls .listeners :
300+ batch = [evt ]
301+
302+ # Drain any additional queued events up to the batch limit so
303+ # we can combine duplicate events and reduce broadcasts.
304+ while len (batch ) < cls .PUBLISH_BATCH_SIZE :
293305 try :
294- listener .read (evt )
295- except Exception as e :
296- # pass
297- # TODO: the log line below was commented out, uncomment it to see it causes any issue
298- logging .error ("Can't publish %s to %s: %s" , evt , listener , e )
306+ batch .append (cls .event_queue .get_nowait ())
307+ except asyncio .QueueEmpty :
308+ break
309+
310+ # Deduplicate: keep only the *latest* event per (obj, _id) pair.
311+ # During a build the same source/build document is updated many
312+ # times in rapid succession — only the final state matters for
313+ # the UI. Log events (no "_id") are always forwarded.
314+ seen = {}
315+ unique_events = []
316+ for event in batch :
317+ obj = event .get ("obj" )
318+ _id = event .get ("_id" )
319+ if obj and _id :
320+ key = (obj , _id )
321+ if key in seen :
322+ # Replace the earlier event with this newer one.
323+ unique_events [seen [key ]] = event
324+ else :
325+ seen [key ] = len (unique_events )
326+ unique_events .append (event )
327+ else :
328+ unique_events .append (event )
329+
330+ for event in unique_events :
331+ for listener in cls .listeners :
332+ try :
333+ listener .read (event )
334+ except Exception as e :
335+ logging .error ("Can't publish %s to %s: %s" , event , listener , e )
336+
337+ # Yield control back to the IOLoop so HTTP handlers and
338+ # WebSocket heartbeats can be processed between batches.
339+ await asyncio .sleep (0 )
299340
300341 return asyncio .ensure_future (do ())
301342
0 commit comments