Skip to content

MONGOCRYPT-763 add in-place retry API #1011

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions integrating.md
Original file line number Diff line number Diff line change
Expand Up @@ -248,8 +248,8 @@ Ensure `mongocrypt_setopt_retry_kms` is called on the `mongocrypt_t` to enable r
d. Feed the reply back with `mongocrypt_kms_ctx_feed`. Repeat
> until `mongocrypt_kms_ctx_bytes_needed` returns 0.

If any step encounters a network error, call `mongocrypt_kms_ctx_fail`.
If `mongocrypt_kms_ctx_fail` returns true, continue to the next KMS context.
If any step encounters a network error or if `mongocrypt_kms_ctx_should_retry` returns true after feeding the reply, call `mongocrypt_kms_ctx_fail`.
If `mongocrypt_kms_ctx_fail` returns true, retry the request by continuing to the next KMS context or by feeding the new response into the same context.
If `mongocrypt_kms_ctx_fail` returns false, abort and report an error. Consider wrapping the error reported in `mongocrypt_kms_ctx_status` to include the last network error.

2. When done feeding all replies, call `mongocrypt_ctx_kms_done`.
Expand Down
44 changes: 27 additions & 17 deletions src/mongocrypt-kms-ctx.c
Original file line number Diff line number Diff line change
Expand Up @@ -1120,6 +1120,28 @@ static bool _ctx_done_kmip_decrypt(mongocrypt_kms_ctx_t *kms_ctx) {
return ret;
}

static bool _is_retryable_req(_kms_request_type_t req_type) {
// Check if request type is retryable. Some requests are non-idempotent and cannot be safely retried.
_kms_request_type_t retryable_types[] = {MONGOCRYPT_KMS_AZURE_OAUTH,
MONGOCRYPT_KMS_GCP_OAUTH,
MONGOCRYPT_KMS_AWS_ENCRYPT,
MONGOCRYPT_KMS_AWS_DECRYPT,
MONGOCRYPT_KMS_AZURE_WRAPKEY,
MONGOCRYPT_KMS_AZURE_UNWRAPKEY,
MONGOCRYPT_KMS_GCP_ENCRYPT,
MONGOCRYPT_KMS_GCP_DECRYPT};
for (size_t i = 0; i < sizeof(retryable_types) / sizeof(retryable_types[0]); i++) {
if (retryable_types[i] == req_type) {
return true;
}
}
return false;
}

bool mongocrypt_kms_ctx_should_retry(mongocrypt_kms_ctx_t *kms) {
return kms && kms->should_retry;
}

bool mongocrypt_kms_ctx_fail(mongocrypt_kms_ctx_t *kms) {
if (!kms) {
return false;
Expand All @@ -1138,23 +1160,7 @@ bool mongocrypt_kms_ctx_fail(mongocrypt_kms_ctx_t *kms) {
return false;
}

// Check if request type is retryable. Some requests are non-idempotent and cannot be safely retried.
_kms_request_type_t retryable_types[] = {MONGOCRYPT_KMS_AZURE_OAUTH,
MONGOCRYPT_KMS_GCP_OAUTH,
MONGOCRYPT_KMS_AWS_ENCRYPT,
MONGOCRYPT_KMS_AWS_DECRYPT,
MONGOCRYPT_KMS_AZURE_WRAPKEY,
MONGOCRYPT_KMS_AZURE_UNWRAPKEY,
MONGOCRYPT_KMS_GCP_ENCRYPT,
MONGOCRYPT_KMS_GCP_DECRYPT};
bool is_retryable = false;
for (size_t i = 0; i < sizeof(retryable_types) / sizeof(retryable_types[0]); i++) {
if (retryable_types[i] == kms->req_type) {
is_retryable = true;
break;
}
}
if (!is_retryable) {
if (!_is_retryable_req(kms->req_type)) {
CLIENT_ERR("KMS request failed due to network error");
return false;
}
Expand All @@ -1178,6 +1184,10 @@ bool mongocrypt_kms_ctx_feed(mongocrypt_kms_ctx_t *kms, mongocrypt_binary_t *byt
if (!mongocrypt_status_ok(status)) {
return false;
}
if (kms->should_retry) {
// This happens when a KMS context is reused in-place
kms->should_retry = false;
}

if (!bytes) {
CLIENT_ERR("argument 'bytes' is required");
Expand Down
12 changes: 11 additions & 1 deletion src/mongocrypt.h
Original file line number Diff line number Diff line change
Expand Up @@ -1180,14 +1180,24 @@ MONGOCRYPT_EXPORT
bool mongocrypt_kms_ctx_feed(mongocrypt_kms_ctx_t *kms, mongocrypt_binary_t *bytes);

/**
* Indicate a network-level failure.
* Indicate a failure. Discards all data fed to this KMS context with @ref mongocrypt_kms_ctx_feed.
* The @ref mongocrypt_kms_ctx_t may be reused.
*
* @param[in] kms The @ref mongocrypt_kms_ctx_t.
* @return A boolean indicating whether the failed request may be retried.
*/
MONGOCRYPT_EXPORT
bool mongocrypt_kms_ctx_fail(mongocrypt_kms_ctx_t *kms);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mongocrypt_kms_ctx_fail currently still refers to "network error" in error messages and increments the retry attempts:

ASSERT_OK(mongocrypt_kms_ctx_feed(kms, retryable_http), kms); // Increments kms->attempts.
ASSERT(mongocrypt_kms_ctx_should_retry(kms));
ASSERT_OK(mongocrypt_kms_ctx_fail(kms), kms); // Increments kms->attempts again.

I like the direction towards simplifying. Suggest resetting the KMS parser in set_retry. I expect this will reset the parser for both HTTP and network error and could further simplify the retry pattern:

while (true) {
    try {
        // TODO: Write and read from socket.
    } catch (e : NetworkError) {
        // Indicate network error:
        mongocrypt_kms_ctx_fail (kms);
    } finally {
        if (mongocrypt_kms_ctx_should_retry (kms)) {
            // Can retry due to a HTTP or network error.
            continue;
        }
    }
}

That way, mongocrypt_kms_ctx_fail does not change its meaning (still means only network error).

Testing this change in 456d523 appears to work.


/**
* Indicate if a KMS context is completed but should be retried.
*
* @param[in] kms The @ref mongocrypt_kms_ctx_t.
* @return A boolean indicating whether the failed request should be retried.
*/
MONGOCRYPT_EXPORT
bool mongocrypt_kms_ctx_should_retry(mongocrypt_kms_ctx_t *kms);

/**
* Get the status associated with a @ref mongocrypt_kms_ctx_t object.
*
Expand Down
7 changes: 7 additions & 0 deletions test/data/kms-aws/encrypt-response-partial.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
HTTP/1.1 200 OK
x-amzn-RequestId: deeb35e5-4ecb-4bf1-9af5-84a54ff0af0e
Content-Type: application/x-amz-json-1.1
Content-Length: 446
Connection: close

{"KeyId": "arn:aws:k
51 changes: 51 additions & 0 deletions test/test-mongocrypt-datakey.c
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,33 @@ static void _test_create_datakey_with_retry(_mongocrypt_tester_t *tester) {
mongocrypt_destroy(crypt);
}

// Test that an HTTP error is retried in-place.
{
mongocrypt_t *crypt = _mongocrypt_tester_mongocrypt(TESTER_MONGOCRYPT_DEFAULT);
mongocrypt_ctx_t *ctx = mongocrypt_ctx_new(crypt);
ASSERT_OK(
mongocrypt_ctx_setopt_key_encryption_key(ctx,
TEST_BSON("{'provider': 'aws', 'key': 'foo', 'region': 'bar'}")),
ctx);
ASSERT_OK(mongocrypt_ctx_datakey_init(ctx), ctx);
ASSERT_STATE_EQUAL(mongocrypt_ctx_state(ctx), MONGOCRYPT_CTX_NEED_KMS);
mongocrypt_kms_ctx_t *kms_ctx = mongocrypt_ctx_next_kms_ctx(ctx);
ASSERT_OK(kms_ctx, ctx);
// Expect no sleep is requested before any error.
ASSERT_CMPINT64(mongocrypt_kms_ctx_usleep(kms_ctx), ==, 0);
// Feed a retryable HTTP error.
ASSERT_OK(mongocrypt_kms_ctx_feed(kms_ctx, TEST_FILE("./test/data/rmd/kms-decrypt-reply-429.txt")), kms_ctx);
// In-place retry is indicated.
ASSERT(mongocrypt_kms_ctx_should_retry(kms_ctx));
ASSERT(mongocrypt_kms_ctx_fail(kms_ctx));
// Feed a successful response.
ASSERT_OK(mongocrypt_kms_ctx_feed(kms_ctx, TEST_FILE("./test/data/kms-aws/encrypt-response.txt")), kms_ctx);
ASSERT_OK(mongocrypt_ctx_kms_done(ctx), ctx);
_mongocrypt_tester_run_ctx_to(tester, ctx, MONGOCRYPT_CTX_DONE);
mongocrypt_ctx_destroy(ctx);
mongocrypt_destroy(crypt);
}

// Test that a network error is retried.
{
mongocrypt_t *crypt = _mongocrypt_tester_mongocrypt(TESTER_MONGOCRYPT_DEFAULT);
Expand Down Expand Up @@ -454,6 +481,30 @@ static void _test_create_datakey_with_retry(_mongocrypt_tester_t *tester) {
mongocrypt_destroy(crypt);
}

// Test that a network error is retried in-place.
{
mongocrypt_t *crypt = _mongocrypt_tester_mongocrypt(TESTER_MONGOCRYPT_DEFAULT);
mongocrypt_ctx_t *ctx = mongocrypt_ctx_new(crypt);
ASSERT_OK(
mongocrypt_ctx_setopt_key_encryption_key(ctx,
TEST_BSON("{'provider': 'aws', 'key': 'foo', 'region': 'bar'}")),
ctx);
ASSERT_OK(mongocrypt_ctx_datakey_init(ctx), ctx);
ASSERT_STATE_EQUAL(mongocrypt_ctx_state(ctx), MONGOCRYPT_CTX_NEED_KMS);
mongocrypt_kms_ctx_t *kms_ctx = mongocrypt_ctx_next_kms_ctx(ctx);
ASSERT_OK(kms_ctx, ctx);
// Expect no sleep is requested before any error.
ASSERT_CMPINT64(mongocrypt_kms_ctx_usleep(kms_ctx), ==, 0);
// Mark a network error.
ASSERT_OK(mongocrypt_kms_ctx_fail(kms_ctx), kms_ctx);
// Feed a successful response.
ASSERT_OK(mongocrypt_kms_ctx_feed(kms_ctx, TEST_FILE("./test/data/kms-aws/encrypt-response.txt")), kms_ctx);
ASSERT_OK(mongocrypt_ctx_kms_done(ctx), ctx);
_mongocrypt_tester_run_ctx_to(tester, ctx, MONGOCRYPT_CTX_DONE);
mongocrypt_ctx_destroy(ctx);
mongocrypt_destroy(crypt);
}

// Test that an oauth request is retried for a network error.
{
mongocrypt_t *crypt = _mongocrypt_tester_mongocrypt(TESTER_MONGOCRYPT_DEFAULT);
Expand Down