Skip to content

PYTHON-4924 - PoolClearedError should have TransientTransactionError … #2244

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Apr 8, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 10 additions & 6 deletions pymongo/asynchronous/pool.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,10 @@ def _raise_connection_failure(
) -> NoReturn:
"""Convert a socket.error to ConnectionFailure and raise it."""
host, port = address
if isinstance(error, PyMongoError) and error._error_labels:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we need any pymongo changes here. We already add the error label to PoolClearedErrors here:

exc_val._add_error_label("TransientTransactionError")

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would we expect the added test to still pass without the code changes then? It fails without these changes.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That test should fail because we only add the TransientTransactionError if we're actually running a transaction.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got it, got it. We can go ahead and close this PR and ticket it sounds like then.

Copy link
Member

@ShaneHarvey ShaneHarvey Mar 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you confirm that we have a test for "pool clear error" having the label in a txn? If not we should add one.

Copy link
Contributor Author

@NoahStapp NoahStapp Mar 31, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Our Start Transactions Retry Example 1 section of TestTransactionExamples in test_examples.py refers to the label, but I don't think it is intended to fail every time and check for the label:

async def run_transaction_with_retry(txn_func, session):
while True:
try:
await txn_func(session) # performs transaction
break
except (ConnectionFailure, OperationFailure) as exc:
print("Transaction aborted. Caught exception during transaction.")
# If transient error, retry the whole transaction
if exc.has_error_label("TransientTransactionError"):
print("TransientTransactionError, retrying transaction ...")
continue
else:
raise

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We still need that new tests added in this PR.

labels = error._error_labels
else:
labels = None
# If connecting to a Unix socket, port will be None.
if port is not None:
msg = "%s:%d: %s" % (host, port, error)
Expand All @@ -200,15 +204,15 @@ def _raise_connection_failure(
if "configured timeouts" not in msg:
msg += format_timeout_details(timeout_details)
if isinstance(error, socket.timeout):
raise NetworkTimeout(msg) from error
raise NetworkTimeout(msg, errors={"errorLabels": labels}) from error
elif isinstance(error, SSLError) and "timed out" in str(error):
# Eventlet does not distinguish TLS network timeouts from other
# SSLErrors (https://github.com/eventlet/eventlet/issues/692).
# Luckily, we can work around this limitation because the phrase
# 'timed out' appears in all the timeout related SSLErrors raised.
raise NetworkTimeout(msg) from error
raise NetworkTimeout(msg, errors={"errorLabels": labels}) from error
else:
raise AutoReconnect(msg) from error
raise AutoReconnect(msg, errors={"errorLabels": labels}) from error


def _get_timeout_details(options: PoolOptions) -> dict[str, float]:
Expand Down Expand Up @@ -1420,9 +1424,9 @@ def _raise_if_not_ready(self, checkout_started_time: float, emit_event: bool) ->
)

details = _get_timeout_details(self.opts)
_raise_connection_failure(
self.address, AutoReconnect("connection pool paused"), timeout_details=details
)
error = AutoReconnect("connection pool paused")
error._add_error_label("TransientTransactionError")
_raise_connection_failure(self.address, error, timeout_details=details)

async def _get_conn(
self, checkout_started_time: float, handler: Optional[_MongoClientErrorHandler] = None
Expand Down
16 changes: 10 additions & 6 deletions pymongo/synchronous/pool.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,10 @@ def _raise_connection_failure(
) -> NoReturn:
"""Convert a socket.error to ConnectionFailure and raise it."""
host, port = address
if isinstance(error, PyMongoError) and error._error_labels:
labels = error._error_labels
else:
labels = None
# If connecting to a Unix socket, port will be None.
if port is not None:
msg = "%s:%d: %s" % (host, port, error)
Expand All @@ -200,15 +204,15 @@ def _raise_connection_failure(
if "configured timeouts" not in msg:
msg += format_timeout_details(timeout_details)
if isinstance(error, socket.timeout):
raise NetworkTimeout(msg) from error
raise NetworkTimeout(msg, errors={"errorLabels": labels}) from error
elif isinstance(error, SSLError) and "timed out" in str(error):
# Eventlet does not distinguish TLS network timeouts from other
# SSLErrors (https://github.com/eventlet/eventlet/issues/692).
# Luckily, we can work around this limitation because the phrase
# 'timed out' appears in all the timeout related SSLErrors raised.
raise NetworkTimeout(msg) from error
raise NetworkTimeout(msg, errors={"errorLabels": labels}) from error
else:
raise AutoReconnect(msg) from error
raise AutoReconnect(msg, errors={"errorLabels": labels}) from error


def _get_timeout_details(options: PoolOptions) -> dict[str, float]:
Expand Down Expand Up @@ -1414,9 +1418,9 @@ def _raise_if_not_ready(self, checkout_started_time: float, emit_event: bool) ->
)

details = _get_timeout_details(self.opts)
_raise_connection_failure(
self.address, AutoReconnect("connection pool paused"), timeout_details=details
)
error = AutoReconnect("connection pool paused")
error._add_error_label("TransientTransactionError")
_raise_connection_failure(self.address, error, timeout_details=details)

def _get_conn(
self, checkout_started_time: float, handler: Optional[_MongoClientErrorHandler] = None
Expand Down
17 changes: 16 additions & 1 deletion test/asynchronous/test_pooling.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
from test.asynchronous.helpers import ConcurrentRunner
from test.utils_shared import delay

from pymongo.asynchronous.pool import Pool, PoolOptions
from pymongo.asynchronous.pool import Pool, PoolOptions, PoolState
from pymongo.socket_checker import SocketChecker

_IS_SYNC = False
Expand Down Expand Up @@ -608,6 +608,21 @@ async def test_max_pool_size_with_connection_failure(self):
# seems error-prone, so check the message too.
self.assertNotIn("waiting for socket from pool", str(context.exception))

async def test_pool_cleared_error_labelled_transient(self):
test_pool = Pool(
("localhost", 27017),
PoolOptions(max_pool_size=1),
)
# Pause the pool, causing it to fail connection checkout.
test_pool.state = PoolState.PAUSED

with self.assertRaises(AutoReconnect) as context:
async with test_pool.checkout():
pass

# Verify that the TransientTransactionError label is present in the error.
self.assertTrue(context.exception.has_error_label("TransientTransactionError"))


if __name__ == "__main__":
unittest.main()
17 changes: 16 additions & 1 deletion test/test_pooling.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
from test.utils_shared import delay

from pymongo.socket_checker import SocketChecker
from pymongo.synchronous.pool import Pool, PoolOptions
from pymongo.synchronous.pool import Pool, PoolOptions, PoolState

_IS_SYNC = True

Expand Down Expand Up @@ -606,6 +606,21 @@ def test_max_pool_size_with_connection_failure(self):
# seems error-prone, so check the message too.
self.assertNotIn("waiting for socket from pool", str(context.exception))

def test_pool_cleared_error_labelled_transient(self):
test_pool = Pool(
("localhost", 27017),
PoolOptions(max_pool_size=1),
)
# Pause the pool, causing it to fail connection checkout.
test_pool.state = PoolState.PAUSED

with self.assertRaises(AutoReconnect) as context:
with test_pool.checkout():
pass

# Verify that the TransientTransactionError label is present in the error.
self.assertTrue(context.exception.has_error_label("TransientTransactionError"))


if __name__ == "__main__":
unittest.main()
Loading