Skip to content

Commit 804eb2b

Browse files
committed
Enables the use of EPOLLEXCLUSIVE for UDP workers where supported.
Normal behaviour for multiple processes using EPOLL to listen on a single FD is for every process to be woken on every IO event. This can cause a thundering herd effect, increasing context switches and cpu usage. With EPOLLEXLUSIVE only a single UDP worker will be woken to handle an IO request greatly reducing context switching and contention, especially as the number of processes grow. One potential downside to using EPOLLEXCLUSIVE is that EPOLL may coalesce multiple events on a file descriptor into a single wakeup. This has the potential to increase latency if only a single process is woken to handle potentially multiple SIP messages. To help balance latency and reduced thundering this patch causes the first worker for a socket to not use EPOLLEXCLUSIVE and thus ALWAYS get woken for events. If present, at least one other worker using EPOLLEXCLUSIVE will also be woken.
1 parent 28fe667 commit 804eb2b

File tree

3 files changed

+27
-19
lines changed

3 files changed

+27
-19
lines changed

io_wait.h

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -347,7 +347,8 @@ inline static int io_watch_add( io_wait_h* h, // lgtm [cpp/use-of-goto]
347347
void* data,
348348
int prio,
349349
unsigned int timeout,
350-
int flags)
350+
int flags,
351+
int exclusive)
351352
{
352353

353354
/* helper macros */
@@ -506,16 +507,9 @@ inline static int io_watch_add( io_wait_h* h, // lgtm [cpp/use-of-goto]
506507
ep_event.events|=EPOLLOUT;
507508
if (!already) {
508509
again1:
509-
#if 0
510-
/* This is currently broken, because when using EPOLLEXCLUSIVE, the OS will
511-
* send sequential events to the same process - thus our pseudo-dispatcher
512-
* will no longer work, since events on a pipe will be queued by a single
513-
* process. - razvanc
514-
*/
515-
#if (defined __OS_linux) && (__GLIBC__ >= 2) && (__GLIBC_MINOR__ >= 24)
516-
if (e->flags & IO_WATCH_READ)
510+
#ifdef EPOLLEXCLUSIVE
511+
if (e->flags & IO_WATCH_READ && exclusive == 1)
517512
ep_event.events|=EPOLLEXCLUSIVE;
518-
#endif
519513
#endif
520514
n=epoll_ctl(h->epfd, EPOLL_CTL_ADD, fd, &ep_event);
521515
if (n==-1){

net/net_udp.c

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,7 @@ inline static int handle_io(struct fd_map* fm, int idx,int event_type)
308308
}
309309

310310

311-
int udp_proc_reactor_init( struct socket_info *si )
311+
int udp_proc_reactor_init( struct socket_info *si, int si_rank )
312312
{
313313

314314
/* create the reactor for UDP proc */
@@ -336,9 +336,18 @@ int udp_proc_reactor_init( struct socket_info *si )
336336
}
337337

338338
/* init: start watching the SIP UDP fd */
339-
if (reactor_add_reader( si->socket, F_UDP_READ, RCT_PRIO_NET, si)<0) {
340-
LM_CRIT("failed to add UDP listen socket to reactor\n");
341-
goto error;
339+
//First child per socket becomes 'Master', will wake on every event
340+
if (si_rank == 0) {
341+
if (reactor_add_reader( si->socket, F_UDP_READ, RCT_PRIO_NET, si)<0) {
342+
LM_CRIT("failed to add UDP listen socket to reactor\n");
343+
goto error;
344+
}
345+
} else {
346+
//Subsequent processes are helpers, only one should be woken to help at a time
347+
if (reactor_add_reader_exclusive( si->socket, F_UDP_READ, RCT_PRIO_NET, si)<0) {
348+
LM_CRIT("failed to add UDP listen socket to reactor\n");
349+
goto error;
350+
}
342351
}
343352

344353
return 0;
@@ -366,7 +375,9 @@ static int fork_dynamic_udp_process(void *si_filter)
366375
bind_address=si; /* shortcut */
367376
/* we first need to init the reactor to be able to add fd
368377
* into it in child_init routines */
369-
if (udp_proc_reactor_init(si) < 0 ||
378+
/* Since this is in addition to the master process, si_rank should be > 0 to enable
379+
* exlusive polling with EPOLL */
380+
if (udp_proc_reactor_init(si, 1) < 0 ||
370381
init_child(10000/*FIXME*/) < 0) {
371382
goto error;
372383
}
@@ -463,7 +474,7 @@ int udp_start_processes(int *chd_rank, int *startup_done)
463474
bind_address=si; /* shortcut */
464475
/* we first need to init the reactor to be able to add fd
465476
* into it in child_init routines */
466-
if (udp_proc_reactor_init(si) < 0 ||
477+
if (udp_proc_reactor_init(si, i) < 0 ||
467478
init_child(*chd_rank) < 0) {
468479
report_failure_status();
469480
if (*chd_rank == 1 && startup_done)

reactor_defs.h

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -78,13 +78,16 @@ int init_reactor_size(void);
7878
init_io_wait(&_worker_io, _name, reactor_size, io_poll_method, _prio_max)
7979

8080
#define reactor_add_reader( _fd, _type, _prio, _data) \
81-
io_watch_add(&_worker_io, _fd, _type, _data, _prio, 0, IO_WATCH_READ)
81+
io_watch_add(&_worker_io, _fd, _type, _data, _prio, 0, IO_WATCH_READ, 0)
82+
83+
#define reactor_add_reader_exclusive( _fd, _type, _prio, _data) \
84+
io_watch_add(&_worker_io, _fd, _type, _data, _prio, 0, IO_WATCH_READ, 1)
8285

8386
#define reactor_add_reader_with_timeout( _fd, _type, _prio, _t, _data) \
84-
io_watch_add(&_worker_io, _fd, _type, _data, _prio, _t, IO_WATCH_READ)
87+
io_watch_add(&_worker_io, _fd, _type, _data, _prio, _t, IO_WATCH_READ, 0)
8588

8689
#define reactor_add_writer( _fd, _type, _prio, _data) \
87-
io_watch_add(&_worker_io, _fd, _type, _data, _prio, 0, IO_WATCH_WRITE)
90+
io_watch_add(&_worker_io, _fd, _type, _data, _prio, 0, IO_WATCH_WRITE, 0)
8891

8992
#define reactor_del_reader( _fd, _idx, _io_flags) \
9093
io_watch_del(&_worker_io, _fd, _idx, _io_flags, IO_WATCH_READ)

0 commit comments

Comments
 (0)