Skip to content

Commit 49dbe97

Browse files
committed
DRIVERS-2884: CSOT avoid connection churn when operations timeout
1 parent 4244306 commit 49dbe97

File tree

11 files changed

+3448
-10
lines changed

11 files changed

+3448
-10
lines changed

source/client-side-operations-timeout/tests/pending-response.json

Lines changed: 661 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 351 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,351 @@
1+
description: "Connection churn is prevented by reading pending responses during connection checkout"
2+
schemaVersion: "1.27"
3+
runOnRequirements:
4+
- minServerVersion: "4.4"
5+
# TODO(SERVER-96344): When using failpoints, mongos returns MaxTimeMSExpired
6+
# after maxTimeMS, whereas mongod returns it after
7+
# max(blockTimeMS, maxTimeMS). Until this ticket is resolved, these tests
8+
# will not pass on sharded clusters.
9+
topologies: ["single", "replicaset"]
10+
createEntities:
11+
- client:
12+
id: &failPointClient failPointClient
13+
useMultipleMongoses: false
14+
- client:
15+
id: &client client
16+
uriOptions:
17+
maxPoolSize: 1
18+
minPoolSize: 1
19+
awaitMinPoolSizeMS: 10000
20+
useMultipleMongoses: false
21+
observeEvents:
22+
- commandFailedEvent
23+
- commandSucceededEvent
24+
- connectionCheckedOutEvent
25+
- connectionCheckedInEvent
26+
- connectionClosedEvent
27+
- connectionPendingResponseSucceeded
28+
- connectionPendingResponseStarted
29+
- connectionPendingResponseFailed
30+
- database:
31+
id: &database test
32+
client: *client
33+
databaseName: *database
34+
- collection:
35+
id: &collection coll
36+
database: *database
37+
collectionName: *collection
38+
initialData:
39+
- collectionName: *collection
40+
databaseName: *database
41+
documents: []
42+
tests:
43+
# Attempting a pending response read on a non-timeout operation that can
44+
# immediately read from the TCP buffer should complete the pending read and
45+
# the connection should be checked out.
46+
- description: "non-timeout op with response and no error"
47+
operations:
48+
# Create a failpoint to block the first operation.
49+
- name: failPoint
50+
object: testRunner
51+
arguments:
52+
client: *failPointClient
53+
failPoint:
54+
configureFailPoint: failCommand
55+
mode: {times: 1}
56+
data:
57+
failCommands: ["insert"]
58+
blockConnection: true
59+
blockTimeMS: 100
60+
# Execute operation with timeout less than block time.
61+
- name: insertOne
62+
object: *collection
63+
arguments:
64+
timeoutMS: 75
65+
document: {_id: 3, x: 1}
66+
expectError:
67+
isTimeoutError: true
68+
# Execute a subsequent operation to complete the read.
69+
- name: find
70+
object: *collection
71+
arguments:
72+
filter: {_id: 1}
73+
expectEvents:
74+
- client: *client
75+
events:
76+
- commandFailedEvent:
77+
commandName: insert
78+
- commandSucceededEvent:
79+
commandName: find
80+
- client: *client
81+
eventType: cmap
82+
events:
83+
- connectionCheckedOutEvent: {}
84+
- connectionCheckedInEvent: {} # Insert fails.
85+
- connectionPendingResponseStarted: {}
86+
- connectionPendingResponseSucceeded: {} # Find operation drains connection.
87+
- connectionCheckedOutEvent: {}
88+
- connectionCheckedInEvent: {} # Find succeeds.
89+
# Attempting a pending response read on a non-timeout operation that gets no
90+
# response from the server after 3s should close the connection.
91+
- description: "non-timeout op with no response"
92+
operations:
93+
# Create a failpoint to block the first operation.
94+
- name: failPoint
95+
object: testRunner
96+
arguments:
97+
client: *failPointClient
98+
failPoint:
99+
configureFailPoint: failCommand
100+
mode: {times: 1}
101+
data:
102+
failCommands: ["insert"]
103+
blockConnection: true
104+
blockTimeMS: 3100
105+
# Execute operation with timeout less than block time.
106+
- name: insertOne
107+
object: *collection
108+
arguments:
109+
timeoutMS: 50
110+
document: {_id: 3, x: 1}
111+
expectError:
112+
isTimeoutError: true
113+
# Execute a subsequent operation to complete the read. We expect failure
114+
# in the pending read, resulting in a closed connection. However, the
115+
# find should retry and succeed.
116+
- name: find
117+
object: *collection
118+
arguments:
119+
filter: {_id: 1}
120+
expectEvents:
121+
- client: *client
122+
events:
123+
- commandFailedEvent:
124+
commandName: insert
125+
# No second failed event since we timed out attempting to check out
126+
# the connection for the second operation.
127+
- commandSucceededEvent:
128+
commandName: find
129+
- client: *client
130+
eventType: cmap
131+
events:
132+
- connectionCheckedOutEvent: {}
133+
- connectionCheckedInEvent: {} # Insert fails.
134+
- connectionPendingResponseStarted: {}
135+
- connectionPendingResponseFailed:
136+
reason: timeout
137+
- connectionClosedEvent:
138+
reason: error
139+
- connectionCheckedOutEvent: {} # Find's retry starts and succeeds
140+
- connectionCheckedInEvent: {}
141+
# Attempting a pending response read on a realistic timeout operation that can
142+
# immediately read from the TCP buffer should complete the pending read and
143+
# the connection should be checked out.
144+
- description: "timeout op with response and no error"
145+
operations:
146+
# Create a failpoint to block the first operation.
147+
- name: failPoint
148+
object: testRunner
149+
arguments:
150+
client: *failPointClient
151+
failPoint:
152+
configureFailPoint: failCommand
153+
mode: {times: 1}
154+
data:
155+
failCommands: ["insert"]
156+
blockConnection: true
157+
blockTimeMS: 250
158+
# Execute operation with timeout less than block time.
159+
- name: insertOne
160+
object: *collection
161+
arguments:
162+
timeoutMS: 75
163+
document: {_id: 3, x: 1}
164+
expectError:
165+
isTimeoutError: true
166+
# Execute a subsequent operation to complete the read.
167+
- name: find
168+
object: *collection
169+
arguments:
170+
timeoutMS: 200
171+
filter: {_id: 1}
172+
expectEvents:
173+
- client: *client
174+
events:
175+
- commandFailedEvent:
176+
commandName: insert
177+
- commandSucceededEvent:
178+
commandName: find
179+
- client: *client
180+
eventType: cmap
181+
events:
182+
- connectionCheckedOutEvent: {}
183+
- connectionCheckedInEvent: {} # Insert fails.
184+
- connectionPendingResponseStarted: {}
185+
- connectionPendingResponseSucceeded: {}
186+
- connectionCheckedOutEvent: {}
187+
- connectionCheckedInEvent: {} # Find succeeds.
188+
# It may take multiple calls to the pending response handler to drain the
189+
# inbound buffer.
190+
- description: "multiple calls to drain buffer"
191+
operations:
192+
# Create a failpoint to block the first and second operation.
193+
- name: failPoint
194+
object: testRunner
195+
arguments:
196+
client: *failPointClient
197+
failPoint:
198+
configureFailPoint: failCommand
199+
mode: {times: 1}
200+
data:
201+
failCommands: ["insert"]
202+
blockConnection: true
203+
blockTimeMS: 500
204+
# Execute operation with timeout less than block time.
205+
- name: insertOne
206+
object: *collection
207+
arguments:
208+
timeoutMS: 50
209+
document: {_id: 3, x: 1}
210+
expectError:
211+
isTimeoutError: true
212+
# Execute a subsequent operation with a timeout less than the block time.
213+
- name: find
214+
object: *collection
215+
arguments:
216+
timeoutMS: 50
217+
filter: {_id: 1}
218+
expectError:
219+
isTimeoutError: true
220+
# Execute a final operation to drain the buffer.
221+
- name: find
222+
object: *collection
223+
arguments:
224+
filter: {_id: 1}
225+
expectEvents:
226+
- client: *client
227+
events:
228+
- commandFailedEvent:
229+
commandName: insert
230+
- commandSucceededEvent:
231+
commandName: find
232+
- client: *client
233+
eventType: cmap
234+
events:
235+
- connectionCheckedOutEvent: {}
236+
- connectionCheckedInEvent: {} # Insert fails.
237+
- connectionPendingResponseStarted: {} # First find fails
238+
- connectionPendingResponseFailed:
239+
reason: timeout
240+
- connectionPendingResponseStarted: {} # Second find drains the buffer.
241+
- connectionPendingResponseSucceeded: {}
242+
- connectionCheckedOutEvent: {}
243+
- connectionCheckedInEvent: {} # Second find succeeds.
244+
# If the connection is closed server-side while draining the response, the
245+
# driver must retry with a different connection.
246+
- description: "read op retries when connection closes server-side while draining response"
247+
operations:
248+
# Create a failpoint to block the first and second operation.
249+
- name: failPoint
250+
object: testRunner
251+
arguments:
252+
client: *failPointClient
253+
failPoint:
254+
configureFailPoint: failCommand
255+
mode: {times: 1}
256+
data:
257+
failCommands: ["insert"]
258+
blockConnection: true
259+
blockTimeMS: 500
260+
closeConnection: true
261+
# Execute operation with timeout less than block time.
262+
- name: insertOne
263+
object: *collection
264+
arguments:
265+
timeoutMS: 50
266+
document: {_id: 3, x: 1}
267+
expectError:
268+
isTimeoutError: true
269+
- name: wait
270+
object: testRunner
271+
arguments:
272+
ms: 500
273+
# Execute a subsequent operation with a timeout less than the block time.
274+
- name: find
275+
object: *collection
276+
arguments:
277+
timeoutMS: 50
278+
filter: {_id: 1}
279+
expectEvents:
280+
- client: *client
281+
events:
282+
- commandFailedEvent:
283+
commandName: insert
284+
- commandSucceededEvent:
285+
commandName: find
286+
- client: *client
287+
eventType: cmap
288+
events:
289+
- connectionCheckedOutEvent: {}
290+
- connectionCheckedInEvent: {} # Insert fails.
291+
- connectionPendingResponseStarted: {} # Pending read fails on first find
292+
- connectionPendingResponseFailed:
293+
reason: error
294+
- connectionClosedEvent:
295+
reason: error
296+
- connectionCheckedOutEvent: {}
297+
- connectionCheckedInEvent: {} # Find finishes.
298+
# If the connection is closed server-side while draining the response, the
299+
# driver must retry with a different connection.
300+
- description: "write op retries when connection closes server-side while draining response"
301+
operations:
302+
# Create a failpoint to block the first and second operation.
303+
- name: failPoint
304+
object: testRunner
305+
arguments:
306+
client: *failPointClient
307+
failPoint:
308+
configureFailPoint: failCommand
309+
mode: {times: 1}
310+
data:
311+
failCommands: ["insert"]
312+
blockConnection: true
313+
blockTimeMS: 500
314+
closeConnection: true
315+
# Execute operation with timeout less than block time.
316+
- name: insertOne
317+
object: *collection
318+
arguments:
319+
timeoutMS: 50
320+
document: {_id: 3, x: 1}
321+
expectError:
322+
isTimeoutError: true
323+
- name: wait
324+
object: testRunner
325+
arguments:
326+
ms: 500
327+
# Execute a subsequent operation with a timeout less than the block time.
328+
- name: insertOne
329+
object: *collection
330+
arguments:
331+
timeoutMS: 50
332+
document: {_id: 3, x: 1}
333+
expectEvents:
334+
- client: *client
335+
events:
336+
- commandFailedEvent:
337+
commandName: insert
338+
- commandSucceededEvent:
339+
commandName: insert
340+
- client: *client
341+
eventType: cmap
342+
events:
343+
- connectionCheckedOutEvent: {}
344+
- connectionCheckedInEvent: {} # Insert fails.
345+
- connectionPendingResponseStarted: {} # Pending read fails on first find
346+
- connectionPendingResponseFailed:
347+
reason: error
348+
- connectionClosedEvent:
349+
reason: error
350+
- connectionCheckedOutEvent: {}
351+
- connectionCheckedInEvent: {} # Find finishes.

0 commit comments

Comments
 (0)