Skip to content

Commit a709078

Browse files
rgw/beast: optimize for accept when meeting error in listenning
It is not suitable to stop accept socket when meeting any error in previous socket listen and accept. This will results in radosgw stop work after a occasional case. For example, Too many open files warning may occur at high iops(or just after reshard, sockets opened may increase for doing operations blocked). Signed-off-by: Mingyuan Liang <[email protected]>
1 parent 100d1b6 commit a709078

File tree

1 file changed

+51
-1
lines changed

1 file changed

+51
-1
lines changed

src/rgw/rgw_asio_frontend.cc

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,44 @@ auto make_stack_allocator() {
6868
return boost::context::protected_fixedsize_stack{512*1024};
6969
}
7070

71+
static constexpr std::chrono::milliseconds BACKOFF_MAX_WAIT(5000);
72+
73+
class RGWAsioBackoff {
74+
using Clock = ceph::coarse_mono_clock;
75+
using Timer = boost::asio::basic_waitable_timer<Clock>;
76+
Timer timer;
77+
78+
ceph::timespan cur_wait;
79+
void update_wait_time();
80+
public:
81+
explicit RGWAsioBackoff(boost::asio::io_context& context) :
82+
timer(context),
83+
cur_wait(std::chrono::milliseconds(1)) {
84+
}
85+
86+
void backoff_sleep(boost::asio::yield_context yield);
87+
void reset() {
88+
cur_wait = std::chrono::milliseconds(1);
89+
}
90+
};
91+
92+
void RGWAsioBackoff::update_wait_time()
93+
{
94+
if (cur_wait < BACKOFF_MAX_WAIT) {
95+
cur_wait = cur_wait * 2;
96+
}
97+
if (cur_wait > BACKOFF_MAX_WAIT) {
98+
cur_wait = BACKOFF_MAX_WAIT;
99+
}
100+
}
101+
102+
void RGWAsioBackoff::backoff_sleep(boost::asio::yield_context yield)
103+
{
104+
update_wait_time();
105+
timer.expires_after(cur_wait);
106+
timer.async_wait(yield);
107+
}
108+
71109
using namespace std;
72110

73111
template <typename Stream>
@@ -441,6 +479,7 @@ class AsioFrontend {
441479

442480
std::atomic<bool> going_down{false};
443481

482+
RGWAsioBackoff backoff;
444483
CephContext* ctx() const { return cct.get(); }
445484
std::optional<dmc::ClientCounters> client_counters;
446485
std::unique_ptr<dmc::ClientConfig> client_config;
@@ -453,7 +492,8 @@ class AsioFrontend {
453492
dmc::SchedulerCtx& sched_ctx,
454493
boost::asio::io_context& context)
455494
: env(env), conf(conf), context(context),
456-
pause_mutex(context.get_executor())
495+
pause_mutex(context.get_executor()),
496+
backoff(context)
457497
{
458498
auto sched_t = dmc::get_scheduler_t(ctx());
459499
switch(sched_t){
@@ -1025,9 +1065,19 @@ void AsioFrontend::accept(Listener& l, boost::asio::yield_context yield)
10251065
return;
10261066
} else if (ec) {
10271067
ldout(ctx(), 1) << "accept failed: " << ec.message() << dendl;
1068+
if (ec == boost::system::errc::too_many_files_open ||
1069+
ec == boost::system::errc::too_many_files_open_in_system ||
1070+
ec == boost::system::errc::no_buffer_space ||
1071+
ec == boost::system::errc::not_enough_memory) {
1072+
// always retry accept() if we hit a resource limit
1073+
backoff.backoff_sleep(yield);
1074+
continue;
1075+
}
1076+
ldout(ctx(), 0) << "accept stopped due to error: " << ec.message() << dendl;
10281077
return;
10291078
}
10301079

1080+
backoff.reset();
10311081
on_accept(l, std::move(l.socket));
10321082
}
10331083
}

0 commit comments

Comments
 (0)