Skip to content

Commit f3d26ba

Browse files
committed
DRIVERS-2884: CSOT avoid connection churn when operations timeout
1 parent ace53b1 commit f3d26ba

File tree

10 files changed

+3513
-9
lines changed

10 files changed

+3513
-9
lines changed

source/client-side-operations-timeout/tests/pending-response.json

Lines changed: 661 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 349 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,349 @@
1+
description: "Connection churn is prevented by reading pending responses during connection checkout"
2+
schemaVersion: "1.27"
3+
runOnRequirements:
4+
- minServerVersion: "4.4"
5+
# TODO(SERVER-96344): When using failpoints, mongos returns MaxTimeMSExpired
6+
# after maxTimeMS, whereas mongod returns it after
7+
# max(blockTimeMS, maxTimeMS). Until this ticket is resolved, these tests
8+
# will not pass on sharded clusters.
9+
topologies: ["single", "replicaset"]
10+
createEntities:
11+
- client:
12+
id: &failPointClient failPointClient
13+
useMultipleMongoses: false
14+
- client:
15+
id: &client client
16+
uriOptions:
17+
maxPoolSize: 1
18+
minPoolSize: 1
19+
awaitMinPoolSizeMS: 10000
20+
useMultipleMongoses: false
21+
observeEvents:
22+
- commandFailedEvent
23+
- commandSucceededEvent
24+
- connectionCheckedOutEvent
25+
- connectionCheckedInEvent
26+
- connectionClosedEvent
27+
- connectionPendingResponseSucceeded
28+
- connectionPendingResponseStarted
29+
- connectionPendingResponseFailed
30+
- database:
31+
id: &database test
32+
client: *client
33+
databaseName: *database
34+
- collection:
35+
id: &collection coll
36+
database: *database
37+
collectionName: *collection
38+
initialData:
39+
- collectionName: *collection
40+
databaseName: *database
41+
documents: []
42+
tests:
43+
# Attempting a pending response read on a non-timeout operation that can
44+
# immediately read from the TCP buffer should complete the pending read and
45+
# the connection should be checked out.
46+
- description: "non-timeout op with response and no error"
47+
operations:
48+
# Create a failpoint to block the first operation.
49+
- name: failPoint
50+
object: testRunner
51+
arguments:
52+
client: *failPointClient
53+
failPoint:
54+
configureFailPoint: failCommand
55+
mode: {times: 1}
56+
data:
57+
failCommands: ["insert"]
58+
blockConnection: true
59+
blockTimeMS: 100
60+
# Execute operation with timeout less than block time.
61+
- name: insertOne
62+
object: *collection
63+
arguments:
64+
timeoutMS: 75
65+
document: {_id: 3, x: 1}
66+
expectError:
67+
isTimeoutError: true
68+
# Execute a subsequent operation to complete the read.
69+
- name: find
70+
object: *collection
71+
arguments:
72+
filter: {_id: 1}
73+
expectEvents:
74+
- client: *client
75+
events:
76+
- commandFailedEvent:
77+
commandName: insert
78+
- commandSucceededEvent:
79+
commandName: find
80+
- client: *client
81+
eventType: cmap
82+
events:
83+
- connectionCheckedOutEvent: {}
84+
- connectionCheckedInEvent: {} # Insert fails.
85+
- connectionPendingResponseStarted: {}
86+
- connectionPendingResponseSucceeded: {} # Find operation drains connection.
87+
- connectionCheckedOutEvent: {}
88+
- connectionCheckedInEvent: {} # Find succeeds.
89+
# Attempting a pending response read on a non-timeout operation that gets no
90+
# response from the server after 3s should close the connection.
91+
- description: "non-timeout op with no response"
92+
operations:
93+
# Create a failpoint to block the first operation.
94+
- name: failPoint
95+
object: testRunner
96+
arguments:
97+
client: *failPointClient
98+
failPoint:
99+
configureFailPoint: failCommand
100+
mode: {times: 1}
101+
data:
102+
failCommands: ["insert"]
103+
blockConnection: true
104+
blockTimeMS: 3100
105+
# Execute operation with timeout less than block time.
106+
- name: insertOne
107+
object: *collection
108+
arguments:
109+
timeoutMS: 50
110+
document: {_id: 3, x: 1}
111+
expectError:
112+
isTimeoutError: true
113+
# Execute a subsequent operation to complete the read. We expect failure
114+
# in the pending read, resulting in a closed connection. However, the
115+
# find should retry and succeed.
116+
- name: find
117+
object: *collection
118+
arguments:
119+
filter: {_id: 1}
120+
expectEvents:
121+
- client: *client
122+
events:
123+
- commandFailedEvent:
124+
commandName: insert
125+
- commandSucceededEvent:
126+
commandName: find
127+
- client: *client
128+
eventType: cmap
129+
events:
130+
- connectionCheckedOutEvent: {}
131+
- connectionCheckedInEvent: {} # Insert fails.
132+
- connectionPendingResponseStarted: {}
133+
- connectionPendingResponseFailed:
134+
reason: timeout
135+
- connectionClosedEvent:
136+
reason: error
137+
- connectionCheckedOutEvent: {} # Find's retry starts and succeeds
138+
- connectionCheckedInEvent: {}
139+
# Attempting a pending response read on a realistic timeout operation that can
140+
# immediately read from the TCP buffer should complete the pending read and
141+
# the connection should be checked out.
142+
- description: "timeout op with response and no error"
143+
operations:
144+
# Create a failpoint to block the first operation.
145+
- name: failPoint
146+
object: testRunner
147+
arguments:
148+
client: *failPointClient
149+
failPoint:
150+
configureFailPoint: failCommand
151+
mode: {times: 1}
152+
data:
153+
failCommands: ["insert"]
154+
blockConnection: true
155+
blockTimeMS: 250
156+
# Execute operation with timeout less than block time.
157+
- name: insertOne
158+
object: *collection
159+
arguments:
160+
timeoutMS: 75
161+
document: {_id: 3, x: 1}
162+
expectError:
163+
isTimeoutError: true
164+
# Execute a subsequent operation to complete the read.
165+
- name: find
166+
object: *collection
167+
arguments:
168+
timeoutMS: 200
169+
filter: {_id: 1}
170+
expectEvents:
171+
- client: *client
172+
events:
173+
- commandFailedEvent:
174+
commandName: insert
175+
- commandSucceededEvent:
176+
commandName: find
177+
- client: *client
178+
eventType: cmap
179+
events:
180+
- connectionCheckedOutEvent: {}
181+
- connectionCheckedInEvent: {} # Insert fails.
182+
- connectionPendingResponseStarted: {}
183+
- connectionPendingResponseSucceeded: {}
184+
- connectionCheckedOutEvent: {}
185+
- connectionCheckedInEvent: {} # Find succeeds.
186+
# It may take multiple calls to the pending response handler to drain the
187+
# inbound buffer.
188+
- description: "multiple calls to drain buffer"
189+
operations:
190+
# Create a failpoint to block the first and second operation.
191+
- name: failPoint
192+
object: testRunner
193+
arguments:
194+
client: *failPointClient
195+
failPoint:
196+
configureFailPoint: failCommand
197+
mode: {times: 1}
198+
data:
199+
failCommands: ["insert"]
200+
blockConnection: true
201+
blockTimeMS: 500
202+
# Execute operation with timeout less than block time.
203+
- name: insertOne
204+
object: *collection
205+
arguments:
206+
timeoutMS: 50
207+
document: {_id: 3, x: 1}
208+
expectError:
209+
isTimeoutError: true
210+
# Execute a subsequent operation with a timeout less than the block time.
211+
- name: find
212+
object: *collection
213+
arguments:
214+
timeoutMS: 50
215+
filter: {_id: 1}
216+
expectError:
217+
isTimeoutError: true
218+
# Execute a final operation to drain the buffer.
219+
- name: find
220+
object: *collection
221+
arguments:
222+
filter: {_id: 1}
223+
expectEvents:
224+
- client: *client
225+
events:
226+
- commandFailedEvent:
227+
commandName: insert
228+
- commandSucceededEvent:
229+
commandName: find
230+
- client: *client
231+
eventType: cmap
232+
events:
233+
- connectionCheckedOutEvent: {}
234+
- connectionCheckedInEvent: {} # Insert fails.
235+
- connectionPendingResponseStarted: {} # First find fails
236+
- connectionPendingResponseFailed:
237+
reason: timeout
238+
- connectionPendingResponseStarted: {} # Second find drains the buffer.
239+
- connectionPendingResponseSucceeded: {}
240+
- connectionCheckedOutEvent: {}
241+
- connectionCheckedInEvent: {} # Second find succeeds.
242+
# If the connection is closed server-side while draining the response, the
243+
# driver must retry with a different connection.
244+
- description: "read op retries when connection closes server-side while draining response"
245+
operations:
246+
# Create a failpoint to block the first and second operation.
247+
- name: failPoint
248+
object: testRunner
249+
arguments:
250+
client: *failPointClient
251+
failPoint:
252+
configureFailPoint: failCommand
253+
mode: {times: 1}
254+
data:
255+
failCommands: ["insert"]
256+
blockConnection: true
257+
blockTimeMS: 500
258+
closeConnection: true
259+
# Execute operation with timeout less than block time.
260+
- name: insertOne
261+
object: *collection
262+
arguments:
263+
timeoutMS: 50
264+
document: {_id: 3, x: 1}
265+
expectError:
266+
isTimeoutError: true
267+
- name: wait
268+
object: testRunner
269+
arguments:
270+
ms: 500
271+
# Execute a subsequent operation with a timeout less than the block time.
272+
- name: find
273+
object: *collection
274+
arguments:
275+
timeoutMS: 50
276+
filter: {_id: 1}
277+
expectEvents:
278+
- client: *client
279+
events:
280+
- commandFailedEvent:
281+
commandName: insert
282+
- commandSucceededEvent:
283+
commandName: find
284+
- client: *client
285+
eventType: cmap
286+
events:
287+
- connectionCheckedOutEvent: {}
288+
- connectionCheckedInEvent: {} # Insert fails.
289+
- connectionPendingResponseStarted: {} # Pending read fails on first find
290+
- connectionPendingResponseFailed:
291+
reason: error
292+
- connectionClosedEvent:
293+
reason: error
294+
- connectionCheckedOutEvent: {}
295+
- connectionCheckedInEvent: {} # Find finishes.
296+
# If the connection is closed server-side while draining the response, the
297+
# driver must retry with a different connection.
298+
- description: "write op retries when connection closes server-side while draining response"
299+
operations:
300+
# Create a failpoint to block the first and second operation.
301+
- name: failPoint
302+
object: testRunner
303+
arguments:
304+
client: *failPointClient
305+
failPoint:
306+
configureFailPoint: failCommand
307+
mode: {times: 1}
308+
data:
309+
failCommands: ["insert"]
310+
blockConnection: true
311+
blockTimeMS: 500
312+
closeConnection: true
313+
# Execute operation with timeout less than block time.
314+
- name: insertOne
315+
object: *collection
316+
arguments:
317+
timeoutMS: 50
318+
document: {_id: 3, x: 1}
319+
expectError:
320+
isTimeoutError: true
321+
- name: wait
322+
object: testRunner
323+
arguments:
324+
ms: 500
325+
# Execute a subsequent operation with a timeout less than the block time.
326+
- name: insertOne
327+
object: *collection
328+
arguments:
329+
timeoutMS: 50
330+
document: {_id: 3, x: 1}
331+
expectEvents:
332+
- client: *client
333+
events:
334+
- commandFailedEvent:
335+
commandName: insert
336+
- commandSucceededEvent:
337+
commandName: insert
338+
- client: *client
339+
eventType: cmap
340+
events:
341+
- connectionCheckedOutEvent: {}
342+
- connectionCheckedInEvent: {} # Insert fails.
343+
- connectionPendingResponseStarted: {} # Pending read fails on first find
344+
- connectionPendingResponseFailed:
345+
reason: error
346+
- connectionClosedEvent:
347+
reason: error
348+
- connectionCheckedOutEvent: {}
349+
- connectionCheckedInEvent: {} # Find finishes.

0 commit comments

Comments
 (0)