Skip to content

Commit 583be7c

Browse files
RazvanLiviuVarzarufauust
authored andcommitted
Make Zabbix Calls Non-Blocking
`Background` The Zabbix Python module performs synchronous API calls, which block the reactor’s event loop and can potentially cause the master to freeze. Even worse, the configured timeout can block execution for up to 10 seconds. After reviewing the official integration documentation (https://www.zabbix.com/integrations/python ), I concluded that no Twisted-compatible module exists that allows asynchronous API calls. Writing such a module from scratch would be excessive for such a small component. Since no Twisted integration is available, I also see no benefit in switching to a different Zabbix module; the current one has served us well. `Changes` The solution is to run the synchronous code in a separate thread by passing `getMetric` to `deferToThread`. This prevents the main thread from being blocked. The critical section is wrapped in a `try/except` block to ensure that any failureinside `getMetric`, for example, Zabbix unavailability, missing metrics, or network issues—does not prevent the build from starting. I also reduced the `timeout` to 3 seconds. Although the main thread is no longer blocked, Buildbot’s `BuildRequestDistributor` will not proceed to the next builder’s `build request` until `canStartBuild` has completed. Reducing the `timeout` prevents unnecessary delays in processing the build request queue
1 parent a06600c commit 583be7c

File tree

1 file changed

+47
-10
lines changed

1 file changed

+47
-10
lines changed

utils.py

Lines changed: 47 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,12 @@
22
import os
33
import re
44
from datetime import datetime
5-
from typing import Any, Tuple
5+
from typing import Any, Generator, Tuple
66

77
import docker
88
from pyzabbix import ZabbixAPI
9+
from twisted.internet import defer, threads
10+
from twisted.python import log
911

1012
from buildbot.buildrequest import BuildRequest
1113
from buildbot.interfaces import IProperties
@@ -289,18 +291,32 @@ def build_request_sort_key(request: BuildRequest):
289291
return min(requests, key=build_request_sort_key)
290292

291293

294+
@defer.inlineCallbacks
292295
def canStartBuild(
293296
builder: Builder, wfb: AbstractWorkerForBuilder, request: BuildRequest
294-
) -> bool:
297+
) -> Generator[defer.Deferred, None, bool]:
295298
worker: AbstractWorker = wfb.worker
296299
if "s390x" not in worker.name:
297300
return True
298301

299302
worker_prefix = "-".join(worker.name.split("-")[0:2])
300303
worker_name = private_config["private"]["worker_name_mapping"][worker_prefix]
301-
# TODO(cvicentiu) this could be done with a yield to not have the master
302-
# stuck until the network operation is completed.
303-
load = getMetric(worker_name, "BB_accept_new_build")
304+
305+
try:
306+
load = yield threads.deferToThread(
307+
getMetric, worker_name, "BB_accept_new_build"
308+
)
309+
except (ZabbixNoHostFound, ZabbixToManyItems, ZabbixNoItemFound) as e:
310+
log.err(e, f"Zabbix Error: Check configuration for {worker_name}")
311+
return True # This is clearly a Zabbix misconfiguration, let the build start
312+
except ZabbixTooOldData as e:
313+
log.err(e, f"Zabbix Error: Too old Zabbix data for worker {worker_name}")
314+
return False
315+
except Exception as e:
316+
log.err(
317+
e, f"Zabbix Error: Unexpected error when fetching data for {worker_name}"
318+
)
319+
return True # In case of other errors, e.g. network issues, let the build start
304320

305321
if float(load) > 60:
306322
worker.quarantine_timeout = 60
@@ -575,12 +591,28 @@ def prioritizeBuilders(
575591
return builders
576592

577593

594+
class ZabbixTooOldData(Exception):
595+
pass
596+
597+
598+
class ZabbixToManyItems(Exception):
599+
pass
600+
601+
602+
class ZabbixNoItemFound(Exception):
603+
pass
604+
605+
606+
class ZabbixNoHostFound(Exception):
607+
pass
608+
609+
578610
# Zabbix helper
579611
def getMetric(hostname: str, metric: str) -> Any:
580612
# set API
581613
zapi = ZabbixAPI(private_config["private"]["zabbix_server"])
582614
zapi.session.verify = True
583-
zapi.timeout = 10
615+
zapi.timeout = 3
584616

585617
zapi.login(api_token=private_config["private"]["zabbix_token"])
586618

@@ -590,20 +622,25 @@ def getMetric(hostname: str, metric: str) -> Any:
590622
host_id = h["hostid"]
591623
break
592624

593-
assert host_id is not None
625+
if host_id is None:
626+
raise ZabbixNoHostFound
594627

595628
hostitems = zapi.item.get(filter={"hostid": host_id, "name": metric})
596629

597-
assert len(hostitems) == 1
630+
if len(hostitems) > 1:
631+
raise ZabbixToManyItems
632+
if len(hostitems) == 0:
633+
raise ZabbixNoItemFound
634+
598635
hostitem = hostitems[0]
599636

600637
last_value = hostitem["lastvalue"]
601638
last_time = datetime.fromtimestamp(int(hostitem["lastclock"]))
602639

603640
elapsed_from_last = (datetime.now() - last_time).total_seconds()
604641

605-
# The latest data is no older than 80 seconds
606-
assert elapsed_from_last < 80
642+
if elapsed_from_last >= 80:
643+
raise ZabbixTooOldData
607644

608645
return last_value
609646

0 commit comments

Comments
 (0)