|
| 1 | +// Copyright 2021 Datafuse Labs |
| 2 | +// |
| 3 | +// Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +// you may not use this file except in compliance with the License. |
| 5 | +// You may obtain a copy of the License at |
| 6 | +// |
| 7 | +// http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +// |
| 9 | +// Unless required by applicable law or agreed to in writing, software |
| 10 | +// distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +// See the License for the specific language governing permissions and |
| 13 | +// limitations under the License. |
| 14 | + |
| 15 | +use std::cmp::min; |
| 16 | + |
| 17 | +use databend_common_exception::ErrorCode; |
| 18 | + |
| 19 | +/// Error counters for tracking persistent and temporary errors during history table operations |
| 20 | +#[derive(Debug, Default)] |
| 21 | +pub struct ErrorCounters { |
| 22 | + persistent: u32, |
| 23 | + temporary: u32, |
| 24 | +} |
| 25 | + |
| 26 | +impl ErrorCounters { |
| 27 | + /// Create a new ErrorCounters instance with zero counts |
| 28 | + pub fn new() -> Self { |
| 29 | + Self::default() |
| 30 | + } |
| 31 | + |
| 32 | + /// Reset both error counters to zero |
| 33 | + pub fn reset(&mut self) { |
| 34 | + self.persistent = 0; |
| 35 | + self.temporary = 0; |
| 36 | + } |
| 37 | + |
| 38 | + /// Increment persistent error counter and return the new count |
| 39 | + pub fn increment_persistent(&mut self) -> u32 { |
| 40 | + self.persistent += 1; |
| 41 | + self.persistent |
| 42 | + } |
| 43 | + |
| 44 | + /// Increment temporary error counter and return the new count |
| 45 | + pub fn increment_temporary(&mut self) -> u32 { |
| 46 | + self.temporary += 1; |
| 47 | + self.temporary |
| 48 | + } |
| 49 | + |
| 50 | + /// Check if persistent error count has exceeded the maximum allowed attempts |
| 51 | + pub fn persistent_exceeded_limit(&self) -> bool { |
| 52 | + self.persistent > MAX_PERSISTENT_ERROR_ATTEMPTS |
| 53 | + } |
| 54 | + |
| 55 | + /// Calculate backoff duration in seconds for temporary errors using exponential backoff |
| 56 | + pub fn calculate_temp_backoff(&self) -> u64 { |
| 57 | + min( |
| 58 | + 2u64.saturating_pow(self.temporary), |
| 59 | + MAX_TEMP_ERROR_BACKOFF_SECONDS, |
| 60 | + ) |
| 61 | + } |
| 62 | + |
| 63 | + /// Get current persistent error count |
| 64 | + pub fn persistent_count(&self) -> u32 { |
| 65 | + self.persistent |
| 66 | + } |
| 67 | + |
| 68 | + /// Get current temporary error count |
| 69 | + pub fn temporary_count(&self) -> u32 { |
| 70 | + self.temporary |
| 71 | + } |
| 72 | +} |
| 73 | + |
| 74 | +/// Maximum number of persistent error attempts before giving up |
| 75 | +const MAX_PERSISTENT_ERROR_ATTEMPTS: u32 = 3; |
| 76 | + |
| 77 | +/// Maximum backoff time in seconds for temporary errors (10 minutes) |
| 78 | +const MAX_TEMP_ERROR_BACKOFF_SECONDS: u64 = 10 * 60; |
| 79 | + |
| 80 | +/// Check if the error is a temporary error that should be retried |
| 81 | +/// We will use this to determine if we should retry the operation. |
| 82 | +pub fn is_temp_error(e: &ErrorCode) -> bool { |
| 83 | + let code = e.code(); |
| 84 | + |
| 85 | + // Storage and I/O errors are considered temporary errors |
| 86 | + let storage = code == ErrorCode::STORAGE_NOT_FOUND |
| 87 | + || code == ErrorCode::STORAGE_PERMISSION_DENIED |
| 88 | + || code == ErrorCode::STORAGE_UNAVAILABLE |
| 89 | + || code == ErrorCode::STORAGE_UNSUPPORTED |
| 90 | + || code == ErrorCode::STORAGE_INSECURE |
| 91 | + || code == ErrorCode::INVALID_OPERATION |
| 92 | + || code == ErrorCode::STORAGE_OTHER; |
| 93 | + |
| 94 | + // If acquire semaphore failed, we consider it a temporary error |
| 95 | + let meta = code == ErrorCode::META_SERVICE_ERROR; |
| 96 | + let transaction = code == ErrorCode::UNRESOLVABLE_CONFLICT; |
| 97 | + |
| 98 | + storage || transaction || meta |
| 99 | +} |
0 commit comments