diff --git a/src/sessions.ts b/src/sessions.ts index ea209b63d10..f89bc008c0a 100644 --- a/src/sessions.ts +++ b/src/sessions.ts @@ -17,6 +17,7 @@ import { MongoErrorLabel, MongoExpiredSessionError, MongoInvalidArgumentError, + MongoOperationTimeoutError, MongoRuntimeError, MongoServerError, MongoTransactionError, @@ -725,7 +726,7 @@ export class ClientSession timeoutMS?: number; } ): Promise { - const MAX_TIMEOUT = 120000; + const MAX_TIMEOUT = 120_000; const timeoutMS = options?.timeoutMS ?? this.timeoutMS ?? null; this.timeoutContext = @@ -737,10 +738,12 @@ export class ClientSession }) : null; - // 1. Record the current monotonic time, which will be used to enforce the 120-second timeout before later retry attempts. - const startTime = this.timeoutContext?.csotEnabled() // This is strictly to appease TS. We must narrow the context to a CSOT context before accessing `.start`. - ? this.timeoutContext.start - : processTimeMS(); + // 1. Compute the absolute deadline for timeout enforcement. + // 1.3 Set `TIMEOUT_MS` to be `timeoutMS` if given, otherwise MAX_TIMEOUT (120-seconds). + const csotEnabled = !!this.timeoutContext?.csotEnabled(); + const deadline = this.timeoutContext?.csotEnabled() + ? processTimeMS() + this.timeoutContext.remainingTimeMS + : processTimeMS() + MAX_TIMEOUT; let committed = false; let result: T; @@ -774,17 +777,13 @@ export class ClientSession BACKOFF_MAX_MS ); - const willExceedTransactionDeadline = - (this.timeoutContext?.csotEnabled() && - backoffMS > this.timeoutContext.remainingTimeMS) || - processTimeMS() + backoffMS > startTime + MAX_TIMEOUT; - - if (willExceedTransactionDeadline) { - throw ( + if (processTimeMS() + backoffMS >= deadline) { + throw makeTimeoutError( lastError ?? - new MongoRuntimeError( - `Transaction retry did not record an error: should never occur. Please file a bug.` - ) + new MongoRuntimeError( + `Transaction retry did not record an error: should never occur. Please file a bug.` + ), + csotEnabled ); } @@ -827,6 +826,8 @@ export class ClientSession throw fnError; } + lastError = fnError; + if ( this.transaction.state === TxnState.STARTING_TRANSACTION || this.transaction.state === TxnState.TRANSACTION_IN_PROGRESS @@ -836,14 +837,15 @@ export class ClientSession await this.abortTransaction(); } - if ( - fnError.hasErrorLabel(MongoErrorLabel.TransientTransactionError) && - (this.timeoutContext?.csotEnabled() || processTimeMS() - startTime < MAX_TIMEOUT) - ) { - // 7.ii If the callback's error includes a "TransientTransactionError" label and the elapsed time of `withTransaction` - // is less than 120 seconds, jump back to step two. - lastError = fnError; - continue retryTransaction; + if (fnError.hasErrorLabel(MongoErrorLabel.TransientTransactionError)) { + if (processTimeMS() < deadline) { + // 7.ii If the callback's error includes a "TransientTransactionError" label and the elapsed time of `withTransaction` + // is less than TIMEOUT_MS, jump back to step two. + continue retryTransaction; + } else { + // 7.ii (cont.) If timeout has been exceeded, raise the transient error (or wrap in timeout for CSOT). + throw makeTimeoutError(fnError, csotEnabled); + } } // 7.iii If the callback's error includes a "UnknownTransactionCommitResult" label, the callback must have manually committed a transaction, @@ -865,37 +867,32 @@ export class ClientSession committed = true; // 10. If commitTransaction reported an error: } catch (commitError) { - // If CSOT is enabled, we repeatedly retry until timeoutMS expires. This is enforced by providing a - // timeoutContext to each async API, which know how to cancel themselves (i.e., the next retry will - // abort the withTransaction call). - // If CSOT is not enabled, do we still have time remaining or have we timed out? - const hasTimedOut = - !this.timeoutContext?.csotEnabled() && processTimeMS() - startTime >= MAX_TIMEOUT; - - if (!hasTimedOut) { - /* - * Note: a maxTimeMS error will have the MaxTimeMSExpired - * code (50) and can be reported as a top-level error or - * inside writeConcernError, ex. - * { ok:0, code: 50, codeName: 'MaxTimeMSExpired' } - * { ok:1, writeConcernError: { code: 50, codeName: 'MaxTimeMSExpired' } } - */ - if ( - !isMaxTimeMSExpiredError(commitError) && - commitError.hasErrorLabel(MongoErrorLabel.UnknownTransactionCommitResult) - ) { - // 10.i If the `commitTransaction` error includes a "UnknownTransactionCommitResult" label and the error is not - // MaxTimeMSExpired and the elapsed time of `withTransaction` is less than 120 seconds, jump back to step eight. - continue retryCommit; - } - - if (commitError.hasErrorLabel(MongoErrorLabel.TransientTransactionError)) { - // 10.ii If the commitTransaction error includes a "TransientTransactionError" label - // and the elapsed time of withTransaction is less than 120 seconds, jump back to step two. - lastError = commitError; - - continue retryTransaction; - } + lastError = commitError; + + if (processTimeMS() >= deadline) { + throw makeTimeoutError(commitError, csotEnabled); + } + + /* + * Note: a maxTimeMS error will have the MaxTimeMSExpired + * code (50) and can be reported as a top-level error or + * inside writeConcernError, ex. + * { ok:0, code: 50, codeName: 'MaxTimeMSExpired' } + * { ok:1, writeConcernError: { code: 50, codeName: 'MaxTimeMSExpired' } } + */ + if ( + !isMaxTimeMSExpiredError(commitError) && + commitError.hasErrorLabel(MongoErrorLabel.UnknownTransactionCommitResult) + ) { + // 10.i If the `commitTransaction` error includes a "UnknownTransactionCommitResult" label and the error is not + // MaxTimeMSExpired and the elapsed time of `withTransaction` is less than TIMEOUT_MS, jump back to step eight. + continue retryCommit; + } + + if (commitError.hasErrorLabel(MongoErrorLabel.TransientTransactionError)) { + // 10.ii If the commitTransaction error includes a "TransientTransactionError" label + // and the elapsed time of withTransaction is less than TIMEOUT_MS, jump back to step two. + continue retryTransaction; } // 10.iii Otherwise, propagate the commitTransaction error to the caller of withTransaction and return immediately. @@ -912,6 +909,21 @@ export class ClientSession } } +function makeTimeoutError(cause: Error, csotEnabled: boolean): Error { + if (csotEnabled) { + const timeoutError = new MongoOperationTimeoutError('Timed out during withTransaction', { + cause + }); + if (cause instanceof MongoError) { + for (const label of cause.errorLabels) { + timeoutError.addErrorLabel(label); + } + } + return timeoutError; + } + return cause; +} + const NON_DETERMINISTIC_WRITE_CONCERN_ERRORS = new Set([ 'CannotSatisfyWriteConcern', 'UnknownReplWriteConcern', diff --git a/test/integration/transactions-convenient-api/transactions-convenient-api.prose.test.ts b/test/integration/transactions-convenient-api/transactions-convenient-api.prose.test.ts index 1f4e67687de..6e304eab4e7 100644 --- a/test/integration/transactions-convenient-api/transactions-convenient-api.prose.test.ts +++ b/test/integration/transactions-convenient-api/transactions-convenient-api.prose.test.ts @@ -2,8 +2,13 @@ import { expect } from 'chai'; import { test } from 'mocha'; import * as sinon from 'sinon'; -import { type ClientSession, type Collection, type MongoClient } from '../../mongodb'; -import { configureFailPoint, type FailCommandFailPoint, measureDuration } from '../../tools/utils'; +import { type ClientSession, type Collection, type MongoClient, MongoError } from '../../mongodb'; +import { + clearFailPoint, + configureFailPoint, + type FailCommandFailPoint, + measureDuration +} from '../../tools/utils'; const failCommand: FailCommandFailPoint = { configureFailPoint: 'failCommand', @@ -85,3 +90,158 @@ describe('Retry Backoff is Enforced', function () { } ); }); + +describe('Retry Timeout is Enforced', function () { + // Drivers should test that withTransaction enforces a non-configurable timeout before retrying + // both commits and entire transactions. + // + // We stub performance.now() to simulate elapsed time exceeding the 120-second retry limit, + // as recommended by the spec: "This might be done by internally modifying the timeout value + // used by withTransaction with some private API or using a mock timer." + // + // Without CSOT, the original error is propagated directly. + // With CSOT, the error is wrapped in a MongoOperationTimeoutError. + + let client: MongoClient; + let collection: Collection; + let timeOffset: number; + + beforeEach(async function () { + client = this.configuration.newClient(); + collection = client.db('foo').collection('bar'); + + timeOffset = 0; + const originalNow = performance.now.bind(performance); + sinon.stub(performance, 'now').callsFake(() => originalNow() + timeOffset); + }); + + afterEach(async function () { + sinon.restore(); + await clearFailPoint(this.configuration); + await client?.close(); + }); + + // Case 1: If the callback raises an error with the TransientTransactionError label and the retry + // timeout has been exceeded, withTransaction should propagate the error to its caller. + test( + 'callback TransientTransactionError propagated when retry timeout exceeded', + { + requires: { + mongodb: '>=4.4', + topology: '!single' + } + }, + async function () { + // 1. Configure a failpoint that fails insert with TransientTransactionError. + await configureFailPoint(this.configuration, { + configureFailPoint: 'failCommand', + mode: { times: 1 }, + data: { + failCommands: ['insert'], + errorCode: 24, + errorLabels: ['TransientTransactionError'] + } + }); + + // 2. Run withTransaction. The callback advances the clock past the 120-second retry + // limit before the insert fails, so the timeout is detected immediately. + const { result } = await measureDuration(() => { + return client.withSession(async s => { + await s.withTransaction(async session => { + timeOffset = 120_000; + await collection.insertOne({}, { session }); + }); + }); + }); + + // 3. Assert that the error is the original TransientTransactionError (propagated directly + // in the legacy non-CSOT path). + expect(result).to.be.instanceOf(MongoError); + expect((result as MongoError).hasErrorLabel('TransientTransactionError')).to.be.true; + } + ); + + // Case 2: If committing raises an error with the UnknownTransactionCommitResult label, and the + // retry timeout has been exceeded, withTransaction should propagate the error to + // its caller. + test( + 'commit UnknownTransactionCommitResult propagated when retry timeout exceeded', + { + requires: { + mongodb: '>=4.4', + topology: '!single' + } + }, + async function () { + // 1. Configure a failpoint that fails commitTransaction with UnknownTransactionCommitResult. + await configureFailPoint(this.configuration, { + configureFailPoint: 'failCommand', + mode: { times: 1 }, + data: { + failCommands: ['commitTransaction'], + errorCode: 64, + errorLabels: ['UnknownTransactionCommitResult'] + } + }); + + // 2. Run withTransaction. The callback advances the clock past the 120-second retry + // limit. The insert succeeds, but the commit fails and the timeout is detected. + const { result } = await measureDuration(() => { + return client.withSession(async s => { + await s.withTransaction(async session => { + timeOffset = 120_000; + await collection.insertOne({}, { session }); + }); + }); + }); + + // 3. Assert that the error is the original commit error (propagated directly + // in the legacy non-CSOT path). + expect(result).to.be.instanceOf(MongoError); + expect((result as MongoError).hasErrorLabel('UnknownTransactionCommitResult')).to.be.true; + } + ); + + // Case 3: If committing raises an error with the TransientTransactionError label and the retry + // timeout has been exceeded, withTransaction should propagate the error to its + // caller. This case may occur if the commit was internally retried against a new primary after a + // failover and the second primary returned a NoSuchTransaction error response. + test( + 'commit TransientTransactionError propagated when retry timeout exceeded', + { + requires: { + mongodb: '>=4.4', + topology: '!single' + } + }, + async function () { + // 1. Configure a failpoint that fails commitTransaction with TransientTransactionError + // (errorCode 251 = NoSuchTransaction). + await configureFailPoint(this.configuration, { + configureFailPoint: 'failCommand', + mode: { times: 1 }, + data: { + failCommands: ['commitTransaction'], + errorCode: 251, + errorLabels: ['TransientTransactionError'] + } + }); + + // 2. Run withTransaction. The callback advances the clock past the 120-second retry + // limit. The insert succeeds, but the commit fails and the timeout is detected. + const { result } = await measureDuration(() => { + return client.withSession(async s => { + await s.withTransaction(async session => { + timeOffset = 120_000; + await collection.insertOne({}, { session }); + }); + }); + }); + + // 3. Assert that the error is the original commit error (propagated directly + // in the legacy non-CSOT path). + expect(result).to.be.instanceOf(MongoError); + expect((result as MongoError).hasErrorLabel('TransientTransactionError')).to.be.true; + } + ); +}); diff --git a/test/spec/client-side-operations-timeout/convenient-transactions.json b/test/spec/client-side-operations-timeout/convenient-transactions.json index f9d03429db9..3400b82ba92 100644 --- a/test/spec/client-side-operations-timeout/convenient-transactions.json +++ b/test/spec/client-side-operations-timeout/convenient-transactions.json @@ -27,7 +27,8 @@ "awaitMinPoolSizeMS": 10000, "useMultipleMongoses": false, "observeEvents": [ - "commandStartedEvent" + "commandStartedEvent", + "commandFailedEvent" ] } }, @@ -188,6 +189,11 @@ } } }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, { "commandStartedEvent": { "commandName": "abortTransaction", @@ -206,6 +212,105 @@ ] } ] + }, + { + "description": "withTransaction surfaces a timeout after exhausting transient transaction retries, retaining the last transient error as the timeout cause.", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "failPointClient", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "insert" + ], + "blockConnection": true, + "blockTimeMS": 25, + "errorCode": 24, + "errorLabels": [ + "TransientTransactionError" + ] + } + } + } + }, + { + "name": "withTransaction", + "object": "session", + "arguments": { + "callback": [ + { + "name": "insertOne", + "object": "collection", + "arguments": { + "document": { + "_id": 1 + }, + "session": "session" + }, + "expectError": { + "isError": true + } + } + ] + }, + "expectError": { + "isTimeoutError": true + } + } + ], + "expectEvents": [ + { + "client": "client", + "ignoreExtraEvents": true, + "events": [ + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "abortTransaction" + } + }, + { + "commandFailedEvent": { + "commandName": "abortTransaction" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "abortTransaction" + } + }, + { + "commandFailedEvent": { + "commandName": "abortTransaction" + } + } + ] + } + ] } ] } diff --git a/test/spec/client-side-operations-timeout/convenient-transactions.yml b/test/spec/client-side-operations-timeout/convenient-transactions.yml index 55b72481dfb..8157c5e4d85 100644 --- a/test/spec/client-side-operations-timeout/convenient-transactions.yml +++ b/test/spec/client-side-operations-timeout/convenient-transactions.yml @@ -19,6 +19,7 @@ createEntities: useMultipleMongoses: false observeEvents: - commandStartedEvent + - commandFailedEvent - database: id: &database database client: *client @@ -104,9 +105,73 @@ tests: command: insert: *collectionName maxTimeMS: { $$type: ["int", "long"] } + - commandFailedEvent: + commandName: insert - commandStartedEvent: commandName: abortTransaction databaseName: admin command: abortTransaction: 1 maxTimeMS: { $$type: [ "int", "long" ] } + + # This test verifies that when withTransaction encounters transient transaction errors it does not + # throw the transient transaction error when the timeout is exceeded, but instead surfaces a timeout error after + # exhausting the retry attempts within the specified timeout. + # The timeout error thrown contains as a cause the last transient error encountered. + - description: "withTransaction surfaces a timeout after exhausting transient transaction retries, retaining the last transient error as the timeout cause." + operations: + - name: failPoint + object: testRunner + arguments: + client: *failPointClient + failPoint: + configureFailPoint: failCommand + mode: alwaysOn + data: + failCommands: ["insert"] + blockConnection: true + blockTimeMS: 25 + errorCode: 24 + errorLabels: ["TransientTransactionError"] + + - name: withTransaction + object: *session + arguments: + callback: + - name: insertOne + object: *collection + arguments: + document: { _id: 1 } + session: *session + expectError: + isError: true + expectError: + isTimeoutError: true + + # Verify that multiple insert (at least 2) attempts occurred due to TransientTransactionError retries + # The exact number of events depends on timing and retry backoff, but there should be at least: + # - 2 commandStartedEvent for insert (initial + at least one retry) + # - 2 commandFailedEvent for insert (corresponding failures) + expectEvents: + - client: *client + ignoreExtraEvents: true + events: + # First insert attempt + - commandStartedEvent: + commandName: insert + - commandFailedEvent: + commandName: insert + - commandStartedEvent: + commandName: abortTransaction + - commandFailedEvent: + commandName: abortTransaction + + # Second insert attempt (retry due to TransientTransactionError) + - commandStartedEvent: + commandName: insert + - commandFailedEvent: + commandName: insert + - commandStartedEvent: + commandName: abortTransaction + - commandFailedEvent: + commandName: abortTransaction