|
19 | 19 | #include <aws/core/utils/logging/ErrorMacros.h> |
20 | 20 |
|
21 | 21 | #include <Poco/Net/NetException.h> |
| 22 | +#include <Poco/Exception.h> |
22 | 23 |
|
23 | 24 | #include <IO/Expect404ResponseScope.h> |
24 | 25 | #include <IO/S3/Requests.h> |
@@ -493,37 +494,37 @@ Model::HeadObjectOutcome Client::HeadObject(HeadObjectRequest & request) const |
493 | 494 | Model::ListObjectsV2Outcome Client::ListObjectsV2(ListObjectsV2Request & request) const |
494 | 495 | { |
495 | 496 | return doRequestWithRetryNetworkErrors</*IsReadMethod*/ true>( |
496 | | - request, [this](const Model::ListObjectsV2Request & req) { return ListObjectsV2(req); }); |
| 497 | + request, [this](Model::ListObjectsV2Request & req) { return ListObjectsV2(req); }); |
497 | 498 | } |
498 | 499 |
|
499 | 500 | Model::ListObjectsOutcome Client::ListObjects(ListObjectsRequest & request) const |
500 | 501 | { |
501 | 502 | return doRequestWithRetryNetworkErrors</*IsReadMethod*/ true>( |
502 | | - request, [this](const Model::ListObjectsRequest & req) { return ListObjects(req); }); |
| 503 | + request, [this](Model::ListObjectsRequest & req) { return ListObjects(req); }); |
503 | 504 | } |
504 | 505 |
|
505 | 506 | Model::GetObjectOutcome Client::GetObject(GetObjectRequest & request) const |
506 | 507 | { |
507 | 508 | return processRequestResult( |
508 | | - doRequest(request, [this](const Model::GetObjectRequest & req) { return GetObject(req); })); |
| 509 | + doRequest(request, [this](Model::GetObjectRequest & req) { return GetObject(req); })); |
509 | 510 | } |
510 | 511 |
|
511 | 512 | Model::AbortMultipartUploadOutcome Client::AbortMultipartUpload(AbortMultipartUploadRequest & request) const |
512 | 513 | { |
513 | 514 | return doRequestWithRetryNetworkErrors</*IsReadMethod*/ false>( |
514 | | - request, [this](const Model::AbortMultipartUploadRequest & req) { return AbortMultipartUpload(req); }); |
| 515 | + request, [this](Model::AbortMultipartUploadRequest & req) { return AbortMultipartUpload(req); }); |
515 | 516 | } |
516 | 517 |
|
517 | 518 | Model::CreateMultipartUploadOutcome Client::CreateMultipartUpload(CreateMultipartUploadRequest & request) const |
518 | 519 | { |
519 | 520 | return doRequestWithRetryNetworkErrors</*IsReadMethod*/ false>( |
520 | | - request, [this](const Model::CreateMultipartUploadRequest & req) { return CreateMultipartUpload(req); }); |
| 521 | + request, [this](Model::CreateMultipartUploadRequest & req) { return CreateMultipartUpload(req); }); |
521 | 522 | } |
522 | 523 |
|
523 | 524 | Model::CompleteMultipartUploadOutcome Client::CompleteMultipartUpload(CompleteMultipartUploadRequest & request) const |
524 | 525 | { |
525 | 526 | auto outcome = doRequestWithRetryNetworkErrors</*IsReadMethod*/ false>( |
526 | | - request, [this](const Model::CompleteMultipartUploadRequest & req) { return CompleteMultipartUpload(req); }); |
| 527 | + request, [this](Model::CompleteMultipartUploadRequest & req) { return CompleteMultipartUpload(req); }); |
527 | 528 |
|
528 | 529 | const auto & key = request.GetKey(); |
529 | 530 | const auto & bucket = request.GetBucket(); |
@@ -570,42 +571,42 @@ Model::CompleteMultipartUploadOutcome Client::CompleteMultipartUpload(CompleteMu |
570 | 571 | Model::CopyObjectOutcome Client::CopyObject(CopyObjectRequest & request) const |
571 | 572 | { |
572 | 573 | return doRequestWithRetryNetworkErrors</*IsReadMethod*/ false>( |
573 | | - request, [this](const Model::CopyObjectRequest & req) { return CopyObject(req); }); |
| 574 | + request, [this](Model::CopyObjectRequest & req) { return CopyObject(req); }); |
574 | 575 | } |
575 | 576 |
|
576 | 577 | Model::PutObjectOutcome Client::PutObject(PutObjectRequest & request) const |
577 | 578 | { |
578 | 579 | return doRequestWithRetryNetworkErrors</*IsReadMethod*/ false>( |
579 | | - request, [this](const Model::PutObjectRequest & req) { return PutObject(req); }); |
| 580 | + request, [this](Model::PutObjectRequest & req) { return PutObject(req); }); |
580 | 581 | } |
581 | 582 |
|
582 | 583 | Model::UploadPartOutcome Client::UploadPart(UploadPartRequest & request) const |
583 | 584 | { |
584 | 585 | return doRequestWithRetryNetworkErrors</*IsReadMethod*/ false>( |
585 | | - request, [this](const Model::UploadPartRequest & req) { return UploadPart(req); }); |
| 586 | + request, [this](Model::UploadPartRequest & req) { return UploadPart(req); }); |
586 | 587 | } |
587 | 588 |
|
588 | 589 | Model::UploadPartCopyOutcome Client::UploadPartCopy(UploadPartCopyRequest & request) const |
589 | 590 | { |
590 | 591 | return doRequestWithRetryNetworkErrors</*IsReadMethod*/ false>( |
591 | | - request, [this](const Model::UploadPartCopyRequest & req) { return UploadPartCopy(req); }); |
| 592 | + request, [this](Model::UploadPartCopyRequest & req) { return UploadPartCopy(req); }); |
592 | 593 | } |
593 | 594 |
|
594 | 595 | Model::DeleteObjectOutcome Client::DeleteObject(DeleteObjectRequest & request) const |
595 | 596 | { |
596 | 597 | return doRequestWithRetryNetworkErrors</*IsReadMethod*/ false>( |
597 | | - request, [this](const Model::DeleteObjectRequest & req) { Expect404ResponseScope scope; return DeleteObject(req); }); |
| 598 | + request, [this](Model::DeleteObjectRequest & req) { Expect404ResponseScope scope; return DeleteObject(req); }); |
598 | 599 | } |
599 | 600 |
|
600 | 601 | Model::DeleteObjectsOutcome Client::DeleteObjects(DeleteObjectsRequest & request) const |
601 | 602 | { |
602 | 603 | return doRequestWithRetryNetworkErrors</*IsReadMethod*/ false>( |
603 | | - request, [this](const Model::DeleteObjectsRequest & req) { Expect404ResponseScope scope; return DeleteObjects(req); }); |
| 604 | + request, [this](Model::DeleteObjectsRequest & req) { Expect404ResponseScope scope; return DeleteObjects(req); }); |
604 | 605 | } |
605 | 606 |
|
606 | 607 | Client::ComposeObjectOutcome Client::ComposeObject(ComposeObjectRequest & request) const |
607 | 608 | { |
608 | | - auto request_fn = [this](const ComposeObjectRequest & req) |
| 609 | + auto request_fn = [this](ComposeObjectRequest & req) |
609 | 610 | { |
610 | 611 | auto & endpoint_provider = const_cast<Client &>(*this).accessEndpointProvider(); |
611 | 612 | AWS_OPERATION_CHECK_PTR(endpoint_provider, ComposeObject, Aws::Client::CoreErrors, Aws::Client::CoreErrors::ENDPOINT_RESOLUTION_FAILURE); |
@@ -634,7 +635,7 @@ Client::ComposeObjectOutcome Client::ComposeObject(ComposeObjectRequest & reques |
634 | 635 | } |
635 | 636 |
|
636 | 637 | template <typename RequestType, typename RequestFn> |
637 | | -std::invoke_result_t<RequestFn, RequestType> |
| 638 | +std::invoke_result_t<RequestFn, RequestType &> |
638 | 639 | Client::doRequest(RequestType & request, RequestFn request_fn) const |
639 | 640 | { |
640 | 641 | addAdditionalAMZHeadersToCanonicalHeadersList(request, client_configuration.extra_headers); |
@@ -729,81 +730,100 @@ Client::doRequest(RequestType & request, RequestFn request_fn) const |
729 | 730 | } |
730 | 731 |
|
731 | 732 | template <bool IsReadMethod, typename RequestType, typename RequestFn> |
732 | | -std::invoke_result_t<RequestFn, RequestType> |
| 733 | +std::invoke_result_t<RequestFn, RequestType &> |
733 | 734 | Client::doRequestWithRetryNetworkErrors(RequestType & request, RequestFn request_fn) const |
734 | 735 | { |
| 736 | + /// S3 does retries network errors actually. |
| 737 | + /// But it does matter when errors occur. |
| 738 | + /// This code retries a specific case when |
| 739 | + /// network error happens when XML document is being read from the response body. |
| 740 | + /// Hence, the response body is a stream, network errors are possible at reading. |
| 741 | + /// S3 doesn't retry them. |
| 742 | + |
| 743 | + /// Not all requests can be retried in that way. |
| 744 | + /// Requests that read out response body to build the result are possible to retry. |
| 745 | + /// Requests that expose the response stream as an answer are not retried with that code. E.g. GetObject. |
| 746 | + |
735 | 747 | addAdditionalAMZHeadersToCanonicalHeadersList(request, client_configuration.extra_headers); |
736 | | - auto with_retries = [this, request_fn_ = std::move(request_fn)] (const RequestType & request_) |
| 748 | + auto with_retries = [this, request_fn_ = std::move(request_fn)] (RequestType & request_) |
737 | 749 | { |
738 | 750 | chassert(client_configuration.retryStrategy); |
739 | 751 | const Int64 max_attempts = client_configuration.retry_strategy.max_retries + 1; |
740 | | - chassert(max_attempts > 0); |
741 | | - std::exception_ptr last_exception = nullptr; |
742 | | - for (Int64 attempt_no = 0; attempt_no < max_attempts; ++attempt_no) |
| 752 | + |
| 753 | + Int64 attempt_no = 1; |
| 754 | + std::invoke_result_t<RequestFn, RequestType &> outcome; |
| 755 | + |
| 756 | + auto net_exception_handler = [&]() -> bool /// return true if we should retry |
| 757 | + { |
| 758 | + incrementProfileEvents<IsReadMethod>(ProfileEvents::S3ReadRequestsErrors, ProfileEvents::S3WriteRequestsErrors); |
| 759 | + if (isClientForDisk()) |
| 760 | + incrementProfileEvents<IsReadMethod>(ProfileEvents::DiskS3ReadRequestsErrors, ProfileEvents::DiskS3WriteRequestsErrors); |
| 761 | + |
| 762 | + tryLogCurrentException(log, fmt::format("Network error on S3 request, attempt {} of {}", attempt_no, max_attempts)); |
| 763 | + |
| 764 | + outcome = Aws::Client::AWSError<Aws::Client::CoreErrors>( |
| 765 | + Aws::Client::CoreErrors::NETWORK_CONNECTION, |
| 766 | + /*name*/ "", |
| 767 | + /*message*/ fmt::format("All {} retry attempts failed. Last exception: {}", max_attempts, getCurrentExceptionMessage(false)), |
| 768 | + /*retryable*/ true); |
| 769 | + |
| 770 | + // network exceptions are always retryable, we could just return true here |
| 771 | + // but we have to check cancellation points for query, ShouldRetry method does it already |
| 772 | + return client_configuration.retryStrategy->ShouldRetry(outcome.GetError(), /*attemptedRetries*/ -1); |
| 773 | + }; |
| 774 | + |
| 775 | + for (attempt_no = 1; attempt_no <= max_attempts; ++attempt_no) |
743 | 776 | { |
744 | 777 | incrementProfileEvents<IsReadMethod>(ProfileEvents::S3ReadRequestAttempts, ProfileEvents::S3WriteRequestAttempts); |
745 | 778 | if (isClientForDisk()) |
746 | 779 | incrementProfileEvents<IsReadMethod>(ProfileEvents::DiskS3ReadRequestAttempts, ProfileEvents::DiskS3WriteRequestAttempts); |
747 | 780 |
|
| 781 | + if (attempt_no > 1) |
| 782 | + { |
| 783 | + incrementProfileEvents<IsReadMethod>(ProfileEvents::S3ReadRequestRetryableErrors, ProfileEvents::S3WriteRequestRetryableErrors); |
| 784 | + if (isClientForDisk()) |
| 785 | + incrementProfileEvents<IsReadMethod>(ProfileEvents::DiskS3ReadRequestRetryableErrors, ProfileEvents::DiskS3WriteRequestRetryableErrors); |
| 786 | + |
| 787 | + // use previously attempt number to calculate delay |
| 788 | + updateNextTimeToRetryAfterRetryableError(outcome.GetError(), attempt_no - 1); |
| 789 | + |
| 790 | + // update ClickHouse-specific attempt number in the request |
| 791 | + // to help choose the right timeouts on the HTTP client which depends on retry attempt number |
| 792 | + auto clickhouse_request_attempt = getClickhouseAttemptNumber(request_); |
| 793 | + setClickhouseAttemptNumber(request_, clickhouse_request_attempt + attempt_no); |
| 794 | + } |
| 795 | + |
748 | 796 | /// Slowing down due to a previously encountered retryable error, possibly from another thread. |
749 | 797 | slowDownAfterRetryableError(); |
750 | 798 |
|
751 | 799 | try |
752 | 800 | { |
753 | | - /// S3 does retries network errors actually. |
754 | | - /// But it does matter when errors occur. |
755 | | - /// This code retries a specific case when |
756 | | - /// network error happens when XML document is being read from the response body. |
757 | | - /// Hence, the response body is a stream, network errors are possible at reading. |
758 | | - /// S3 doesn't retry them. |
759 | | - |
760 | | - /// Not all requests can be retried in that way. |
761 | | - /// Requests that read out response body to build the result are possible to retry. |
762 | | - /// Requests that expose the response stream as an answer are not retried with that code. E.g. GetObject. |
763 | | - auto outcome = request_fn_(request_); |
764 | | - |
765 | | - if (!outcome.IsSuccess() |
766 | | - /// AWS SDK's built-in per-thread retry logic is disabled. |
767 | | - && client_configuration.s3_slow_all_threads_after_retryable_error |
768 | | - && attempt_no + 1 < max_attempts |
769 | | - /// Retry attempts are managed by the outer loop, so the attemptedRetries argument can be ignored. |
770 | | - && client_configuration.retryStrategy->ShouldRetry(outcome.GetError(), /*attemptedRetries*/ -1)) |
771 | | - { |
772 | | - incrementProfileEvents<IsReadMethod>( |
773 | | - ProfileEvents::S3ReadRequestRetryableErrors, ProfileEvents::S3WriteRequestRetryableErrors); |
774 | | - if (isClientForDisk()) |
775 | | - incrementProfileEvents<IsReadMethod>( |
776 | | - ProfileEvents::DiskS3ReadRequestRetryableErrors, ProfileEvents::DiskS3WriteRequestRetryableErrors); |
777 | | - |
778 | | - updateNextTimeToRetryAfterRetryableError(outcome.GetError(), attempt_no); |
779 | | - continue; |
780 | | - } |
781 | | - return outcome; |
782 | | - } |
783 | | - catch (Poco::Net::NetException &) |
784 | | - { |
785 | | - /// This includes "connection reset", "malformed message", and possibly other exceptions. |
| 801 | + outcome = request_fn_(request_); |
786 | 802 |
|
787 | | - incrementProfileEvents<IsReadMethod>(ProfileEvents::S3ReadRequestsErrors, ProfileEvents::S3WriteRequestsErrors); |
788 | | - if (isClientForDisk()) |
789 | | - incrementProfileEvents<IsReadMethod>(ProfileEvents::DiskS3ReadRequestsErrors, ProfileEvents::DiskS3WriteRequestsErrors); |
790 | | - |
791 | | - tryLogCurrentException(log, "Will retry"); |
792 | | - last_exception = std::current_exception(); |
| 803 | + if (outcome.IsSuccess()) |
| 804 | + break; |
793 | 805 |
|
794 | | - auto error = Aws::Client::AWSError<Aws::Client::CoreErrors>(Aws::Client::CoreErrors::NETWORK_CONNECTION, /*retry*/ true); |
| 806 | + // do not increment S3ReadRequestsErrors/S3WriteRequestsErrors here, it has been accounted in IO/S3/PocoHTTPClient.cpp |
795 | 807 |
|
796 | | - /// Check if query is canceled. |
797 | 808 | /// Retry attempts are managed by the outer loop, so the attemptedRetries argument can be ignored. |
798 | | - if (!client_configuration.retryStrategy->ShouldRetry(error, /*attemptedRetries*/ -1)) |
| 809 | + if (!client_configuration.retryStrategy->ShouldRetry(outcome.GetError(), /*attemptedRetries*/ -1)) |
| 810 | + break; |
| 811 | + } |
| 812 | + catch (Poco::Net::NetException &) |
| 813 | + { |
| 814 | + /// This includes "connection reset", "malformed message", and possibly other exceptions. |
| 815 | + if (!net_exception_handler()) |
| 816 | + break; |
| 817 | + } |
| 818 | + catch (Poco::TimeoutException &) |
| 819 | + { |
| 820 | + /// This includes "Timeout" |
| 821 | + if (!net_exception_handler()) |
799 | 822 | break; |
800 | | - |
801 | | - updateNextTimeToRetryAfterRetryableError(error, attempt_no); |
802 | 823 | } |
803 | 824 | } |
804 | 825 |
|
805 | | - chassert(last_exception); |
806 | | - std::rethrow_exception(last_exception); |
| 826 | + return outcome; |
807 | 827 | }; |
808 | 828 |
|
809 | 829 | return doRequest(request, with_retries); |
@@ -844,7 +864,7 @@ void Client::updateNextTimeToRetryAfterRetryableError(Aws::Client::AWSError<Aws: |
844 | 864 | { |
845 | 865 | if (next_time_to_retry_after_retryable_error.compare_exchange_weak(stored_next_time, next_time_ms)) |
846 | 866 | { |
847 | | - LOG_TRACE(log, "Updated next retry time to {} ms forward after retryable error with code {} ('{}')", sleep_ms, error.GetResponseCode(), error.GetMessage()); |
| 867 | + LOG_TRACE(log, "Updated next retry time to {} ms forward after retryable error with code {}", sleep_ms, error.GetResponseCode()); |
848 | 868 | break; |
849 | 869 | } |
850 | 870 | } |
|
0 commit comments