Skip to content

Commit 0f1fed1

Browse files
authored
Merge pull request ceph#64110 from yuvalif/wip-yuval-71402
rgw/notifications: add http request timeout and max inflight
2 parents cb903ca + 8a2e7bf commit 0f1fed1

File tree

3 files changed

+59
-2
lines changed

3 files changed

+59
-2
lines changed

doc/radosgw/notifications.rst

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,17 @@ Notification Performance Statistics
141141
``pubsub_push_ok`` and ``pubsub_push_fail`` are incremented per push action
142142
on each notification.
143143

144+
Configuration Options
145+
------------------------------
146+
The following are global configuration options for the different endpoints:
147+
148+
HTTP
149+
~~~~
150+
.. confval:: rgw_http_notif_message_timeout
151+
.. confval:: rgw_http_notif_connection_timeout
152+
.. confval:: rgw_http_notif_max_inflight
153+
154+
144155
Bucket Notification REST API
145156
----------------------------
146157

src/common/options/rgw.yaml.in

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4307,6 +4307,42 @@ options:
43074307
services:
43084308
- rgw
43094309
with_legacy: true
4310+
- name: rgw_http_notif_message_timeout
4311+
type: uint
4312+
level: advanced
4313+
desc: This is the maximum time in seconds to deliver a notification
4314+
long_desc: This is the maximum time in seconds to deliver a notification.
4315+
Delivery error occurs when the message timeout is exceeded.
4316+
This value includes the connection time, and hence must be larger than rgw_http_notif_connection_timeout.
4317+
If set to zero the http client will wait indefinitely.
4318+
see https://curl.se/libcurl/c/CURLOPT_TIMEOUT.html
4319+
default: 10
4320+
services:
4321+
- rgw
4322+
with_legacy: true
4323+
- name: rgw_http_notif_connection_timeout
4324+
type: uint
4325+
level: advanced
4326+
desc: This is the maximum time in seconds to connect to an endpoint
4327+
long_desc: This is the maximum time in seconds to connect to an endpoint.
4328+
Delivery error occurs when the message timeout is exceeded.
4329+
If set to zero the default value of 300 seconds will be used.
4330+
see https://curl.se/libcurl/c/CURLOPT_CONNECTTIMEOUT.html
4331+
default: 5
4332+
services:
4333+
- rgw
4334+
with_legacy: true
4335+
- name: rgw_http_notif_max_inflight
4336+
type: uint
4337+
level: advanced
4338+
desc: This is the maximum number of messages in-flight (across all http endpoints)
4339+
long_desc: This is the maximum number of messages in-flight (across all http endpoints).
4340+
Delivery error (BUSY) occurs when the number of messages is exceeded.
4341+
If set to zero there is no limit on the number of messages in-flight.
4342+
default: 8192
4343+
services:
4344+
- rgw
4345+
with_legacy: true
43104346
- name: rgw_d4n_l1_datacache_address
43114347
type: str
43124348
level: advanced

src/rgw/driver/rados/rgw_pubsub_push.cc

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ bool get_bool(const RGWHTTPArgs& args, const std::string& name, bool default_val
6161

6262
static std::unique_ptr<RGWHTTPManager> s_http_manager;
6363
static std::shared_mutex s_http_manager_mutex;
64+
static std::atomic<unsigned> s_http_manager_inflight(0);
6465

6566
class RGWPubSubHTTPEndpoint : public RGWPubSubEndpoint {
6667
private:
@@ -99,10 +100,17 @@ class RGWPubSubHTTPEndpoint : public RGWPubSubEndpoint {
99100
ldout(cct, 1) << "ERROR: send failed. http endpoint manager not running" << dendl;
100101
return -ESRCH;
101102
}
103+
const auto max_inflight = cct->_conf->rgw_http_notif_max_inflight;
104+
if (max_inflight != 0 &&
105+
s_http_manager_inflight >= max_inflight) {
106+
ldout(cct, 1) << "ERROR: send failed. http endpoint manager busy. in-flight requests: " <<
107+
s_http_manager_inflight << " >= " << max_inflight << dendl;
108+
return -EBUSY;
109+
}
102110
bufferlist read_bl;
103111
RGWPostHTTPData request(cct, "POST", endpoint, &read_bl, verify_ssl);
104-
//default to 3 seconds for wrong url hits - if wrong endpoint configured
105-
request.set_req_connect_timeout(3);
112+
request.set_req_connect_timeout(cct->_conf->rgw_http_notif_connection_timeout);
113+
request.set_req_timeout(cct->_conf->rgw_http_notif_message_timeout);
106114
const auto post_data = json_format_pubsub_event(event);
107115
if (cloudevents) {
108116
// following: https://github.com/cloudevents/spec/blob/v1.0.1/http-protocol-binding.md
@@ -118,11 +126,13 @@ class RGWPubSubHTTPEndpoint : public RGWPubSubEndpoint {
118126
request.set_post_data(post_data);
119127
request.set_send_length(post_data.length());
120128
request.append_header("Content-Type", "application/json");
129+
++s_http_manager_inflight;
121130
if (perfcounter) perfcounter->inc(l_rgw_pubsub_push_pending);
122131
auto rc = s_http_manager->add_request(&request);
123132
if (rc == 0) {
124133
rc = request.wait(dpp, y);
125134
}
135+
--s_http_manager_inflight;
126136
if (perfcounter) perfcounter->dec(l_rgw_pubsub_push_pending);
127137
// TODO: use read_bl to process return code and handle according to ack level
128138
return rc;

0 commit comments

Comments
 (0)