Skip to content

Commit b8194f1

Browse files
committed
Enables the use of EPOLLEXCLUSIVE for UDP workers where supported.
Normal behaviour for multiple processes using EPOLL to listen on a single FD is for every process to be woken on every IO event. This can cause a thundering herd effect, increasing context switches and cpu usage. With EPOLLEXLUSIVE only a single UDP worker will be woken to handle an IO request greatly reducing context switching and contention, especially as the number of processes grow. One potential downside to using EPOLLEXCLUSIVE is that EPOLL may coalesce multiple events on a file descriptor into a single wakeup. This has the potential to increase latency if only a single process is woken to handle potentially multiple SIP messages. To help balance latency and reduced thundering this patch causes the first worker for a socket to not use EPOLLEXCLUSIVE and thus ALWAYS get woken for events. If present, at least one other worker using EPOLLEXCLUSIVE will also be woken.
1 parent d089b9c commit b8194f1

File tree

3 files changed

+27
-19
lines changed

3 files changed

+27
-19
lines changed

io_wait.h

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -347,7 +347,8 @@ inline static int io_watch_add( io_wait_h* h, // lgtm [cpp/use-of-goto]
347347
void* data,
348348
int prio,
349349
unsigned int timeout,
350-
int flags)
350+
int flags,
351+
int exclusive)
351352
{
352353

353354
/* helper macros */
@@ -506,16 +507,9 @@ inline static int io_watch_add( io_wait_h* h, // lgtm [cpp/use-of-goto]
506507
ep_event.events|=EPOLLOUT;
507508
if (!already) {
508509
again1:
509-
#if 0
510-
/* This is currently broken, because when using EPOLLEXCLUSIVE, the OS will
511-
* send sequential events to the same process - thus our pseudo-dispatcher
512-
* will no longer work, since events on a pipe will be queued by a single
513-
* process. - razvanc
514-
*/
515-
#if (defined __OS_linux) && (__GLIBC__ >= 2) && (__GLIBC_MINOR__ >= 24)
516-
if (e->flags & IO_WATCH_READ)
510+
#ifdef EPOLLEXCLUSIVE
511+
if (e->flags & IO_WATCH_READ && exclusive == 1)
517512
ep_event.events|=EPOLLEXCLUSIVE;
518-
#endif
519513
#endif
520514
n=epoll_ctl(h->epfd, EPOLL_CTL_ADD, fd, &ep_event);
521515
if (n==-1){

net/net_udp.c

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,7 @@ inline static int handle_io(struct fd_map* fm, int idx,int event_type)
331331
}
332332

333333

334-
int udp_proc_reactor_init( struct socket_info *si )
334+
int udp_proc_reactor_init( struct socket_info *si, int si_rank )
335335
{
336336

337337
/* create the reactor for UDP proc */
@@ -359,9 +359,18 @@ int udp_proc_reactor_init( struct socket_info *si )
359359
}
360360

361361
/* init: start watching the SIP UDP fd */
362-
if (reactor_add_reader( si->socket, F_UDP_READ, RCT_PRIO_NET, si)<0) {
363-
LM_CRIT("failed to add UDP listen socket to reactor\n");
364-
goto error;
362+
//First child per socket becomes 'Master', will wake on every event
363+
if (si_rank == 0) {
364+
if (reactor_add_reader( si->socket, F_UDP_READ, RCT_PRIO_NET, si)<0) {
365+
LM_CRIT("failed to add UDP listen socket to reactor\n");
366+
goto error;
367+
}
368+
} else {
369+
//Subsequent processes are helpers, only one should be woken to help at a time
370+
if (reactor_add_reader_exclusive( si->socket, F_UDP_READ, RCT_PRIO_NET, si)<0) {
371+
LM_CRIT("failed to add UDP listen socket to reactor\n");
372+
goto error;
373+
}
365374
}
366375

367376
return 0;
@@ -389,7 +398,9 @@ static int fork_dynamic_udp_process(void *si_filter)
389398
bind_address=si; /* shortcut */
390399
/* we first need to init the reactor to be able to add fd
391400
* into it in child_init routines */
392-
if (udp_proc_reactor_init(si) < 0 ||
401+
/* Since this is in addition to the master process, si_rank should be > 0 to enable
402+
* exlusive polling with EPOLL */
403+
if (udp_proc_reactor_init(si, 1) < 0 ||
393404
init_child(10000/*FIXME*/) < 0) {
394405
goto error;
395406
}
@@ -486,7 +497,7 @@ int udp_start_processes(int *chd_rank, int *startup_done)
486497
bind_address=si; /* shortcut */
487498
/* we first need to init the reactor to be able to add fd
488499
* into it in child_init routines */
489-
if (udp_proc_reactor_init(si) < 0 ||
500+
if (udp_proc_reactor_init(si, i) < 0 ||
490501
init_child(*chd_rank) < 0) {
491502
report_failure_status();
492503
if (*chd_rank == 1 && startup_done)

reactor_defs.h

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -78,13 +78,16 @@ int init_reactor_size(void);
7878
init_io_wait(&_worker_io, _name, reactor_size, io_poll_method, _prio_max)
7979

8080
#define reactor_add_reader( _fd, _type, _prio, _data) \
81-
io_watch_add(&_worker_io, _fd, _type, _data, _prio, 0, IO_WATCH_READ)
81+
io_watch_add(&_worker_io, _fd, _type, _data, _prio, 0, IO_WATCH_READ, 0)
82+
83+
#define reactor_add_reader_exclusive( _fd, _type, _prio, _data) \
84+
io_watch_add(&_worker_io, _fd, _type, _data, _prio, 0, IO_WATCH_READ, 1)
8285

8386
#define reactor_add_reader_with_timeout( _fd, _type, _prio, _t, _data) \
84-
io_watch_add(&_worker_io, _fd, _type, _data, _prio, _t, IO_WATCH_READ)
87+
io_watch_add(&_worker_io, _fd, _type, _data, _prio, _t, IO_WATCH_READ, 0)
8588

8689
#define reactor_add_writer( _fd, _type, _prio, _data) \
87-
io_watch_add(&_worker_io, _fd, _type, _data, _prio, 0, IO_WATCH_WRITE)
90+
io_watch_add(&_worker_io, _fd, _type, _data, _prio, 0, IO_WATCH_WRITE, 0)
8891

8992
#define reactor_del_reader( _fd, _idx, _io_flags) \
9093
io_watch_del(&_worker_io, _fd, _idx, _io_flags, IO_WATCH_READ)

0 commit comments

Comments
 (0)