Skip to content

Commit af8adba

Browse files
muzarskiLorak-mmk
authored andcommitted
retry_policy: don't use wildcard '_' in QueryError match
Since last time, during error refactor I introduced a silent bug to the code (#1075), I'd like to prevent that from happening in the future. This is why we replace a `_` match with explicit error variants in retry policy modules. We also enable `wildcard_enum_match_arm` clippy lint in this place for QueryError and DbError matches.
1 parent 9522c72 commit af8adba

File tree

2 files changed

+200
-125
lines changed

2 files changed

+200
-125
lines changed

scylla/src/policies/retry/default.rs

Lines changed: 95 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -59,77 +59,113 @@ impl RetrySession for DefaultRetrySession {
5959
if request_info.consistency.is_serial() {
6060
return RetryDecision::DontRetry;
6161
};
62+
// Do not remove this lint!
63+
// It's there for a reason - we don't want new variants
64+
// automatically fall under `_` pattern when they are introduced.
65+
#[deny(clippy::wildcard_enum_match_arm)]
6266
match request_info.error {
63-
// Basic errors - there are some problems on this node
64-
// Retry on a different one if possible
65-
RequestAttemptError::BrokenConnectionError(_)
66-
| RequestAttemptError::DbError(DbError::Overloaded, _)
67-
| RequestAttemptError::DbError(DbError::ServerError, _)
68-
| RequestAttemptError::DbError(DbError::TruncateError, _) => {
67+
// With connection broken, we don't know if request was executed.
68+
RequestAttemptError::BrokenConnectionError(_) => {
6969
if request_info.is_idempotent {
7070
RetryDecision::RetryNextTarget(None)
7171
} else {
7272
RetryDecision::DontRetry
7373
}
7474
}
75-
// Unavailable - the current node believes that not enough nodes
76-
// are alive to satisfy specified consistency requirements.
77-
// Maybe this node has network problems - try a different one.
78-
// Perform at most one retry - it's unlikely that two nodes
79-
// have network problems at the same time
80-
RequestAttemptError::DbError(DbError::Unavailable { .. }, _) => {
81-
if !self.was_unavailable_retry {
82-
self.was_unavailable_retry = true;
83-
RetryDecision::RetryNextTarget(None)
84-
} else {
85-
RetryDecision::DontRetry
86-
}
87-
}
88-
// ReadTimeout - coordinator didn't receive enough replies in time.
89-
// Retry at most once and only if there were actually enough replies
90-
// to satisfy consistency but they were all just checksums (data_present == false).
91-
// This happens when the coordinator picked replicas that were overloaded/dying.
92-
// Retried request should have some useful response because the node will detect
93-
// that these replicas are dead.
94-
RequestAttemptError::DbError(
95-
DbError::ReadTimeout {
96-
received,
97-
required,
98-
data_present,
99-
..
100-
},
101-
_,
102-
) => {
103-
if !self.was_read_timeout_retry && received >= required && !*data_present {
104-
self.was_read_timeout_retry = true;
105-
RetryDecision::RetrySameTarget(None)
106-
} else {
107-
RetryDecision::DontRetry
108-
}
109-
}
110-
// Write timeout - coordinator didn't receive enough replies in time.
111-
// Retry at most once and only for BatchLog write.
112-
// Coordinator probably didn't detect the nodes as dead.
113-
// By the time we retry they should be detected as dead.
114-
RequestAttemptError::DbError(DbError::WriteTimeout { write_type, .. }, _) => {
115-
if !self.was_write_timeout_retry
116-
&& request_info.is_idempotent
117-
&& *write_type == WriteType::BatchLog
118-
{
119-
self.was_write_timeout_retry = true;
120-
RetryDecision::RetrySameTarget(None)
121-
} else {
122-
RetryDecision::DontRetry
75+
// DbErrors
76+
RequestAttemptError::DbError(db_error, _) => {
77+
// Do not remove this lint!
78+
// It's there for a reason - we don't want new variants
79+
// automatically fall under `_` pattern when they are introduced.
80+
#[deny(clippy::wildcard_enum_match_arm)]
81+
match db_error {
82+
// Basic errors - there are some problems on this node
83+
// Retry on a different one if possible
84+
DbError::Overloaded | DbError::ServerError | DbError::TruncateError => {
85+
if request_info.is_idempotent {
86+
RetryDecision::RetryNextTarget(None)
87+
} else {
88+
RetryDecision::DontRetry
89+
}
90+
}
91+
// Unavailable - the current node believes that not enough nodes
92+
// are alive to satisfy specified consistency requirements.
93+
// Maybe this node has network problems - try a different one.
94+
// Perform at most one retry - it's unlikely that two nodes
95+
// have network problems at the same time
96+
DbError::Unavailable { .. } => {
97+
if !self.was_unavailable_retry {
98+
self.was_unavailable_retry = true;
99+
RetryDecision::RetryNextTarget(None)
100+
} else {
101+
RetryDecision::DontRetry
102+
}
103+
}
104+
// ReadTimeout - coordinator didn't receive enough replies in time.
105+
// Retry at most once and only if there were actually enough replies
106+
// to satisfy consistency but they were all just checksums (data_present == false).
107+
// This happens when the coordinator picked replicas that were overloaded/dying.
108+
// Retried request should have some useful response because the node will detect
109+
// that these replicas are dead.
110+
DbError::ReadTimeout {
111+
received,
112+
required,
113+
data_present,
114+
..
115+
} => {
116+
if !self.was_read_timeout_retry && received >= required && !*data_present {
117+
self.was_read_timeout_retry = true;
118+
RetryDecision::RetrySameTarget(None)
119+
} else {
120+
RetryDecision::DontRetry
121+
}
122+
}
123+
// Write timeout - coordinator didn't receive enough replies in time.
124+
// Retry at most once and only for BatchLog write.
125+
// Coordinator probably didn't detect the nodes as dead.
126+
// By the time we retry they should be detected as dead.
127+
DbError::WriteTimeout { write_type, .. } => {
128+
if !self.was_write_timeout_retry
129+
&& request_info.is_idempotent
130+
&& *write_type == WriteType::BatchLog
131+
{
132+
self.was_write_timeout_retry = true;
133+
RetryDecision::RetrySameTarget(None)
134+
} else {
135+
RetryDecision::DontRetry
136+
}
137+
}
138+
// The node is still bootstrapping it can't execute the request, we should try another one
139+
DbError::IsBootstrapping => RetryDecision::RetryNextTarget(None),
140+
// In all other cases propagate the error to the user
141+
DbError::SyntaxError
142+
| DbError::Invalid
143+
| DbError::AlreadyExists { .. }
144+
| DbError::FunctionFailure { .. }
145+
| DbError::AuthenticationError
146+
| DbError::Unauthorized
147+
| DbError::ConfigError
148+
| DbError::ReadFailure { .. }
149+
| DbError::WriteFailure { .. }
150+
| DbError::Unprepared { .. }
151+
| DbError::ProtocolError
152+
| DbError::RateLimitReached { .. }
153+
| DbError::Other(_)
154+
| _ => RetryDecision::DontRetry,
123155
}
124156
}
125-
// The node is still bootstrapping it can't execute the request, we should try another one
126-
RequestAttemptError::DbError(DbError::IsBootstrapping, _) => {
127-
RetryDecision::RetryNextTarget(None)
128-
}
129157
// Connection to the contacted node is overloaded, try another one
130158
RequestAttemptError::UnableToAllocStreamId => RetryDecision::RetryNextTarget(None),
131159
// In all other cases propagate the error to the user
132-
_ => RetryDecision::DontRetry,
160+
RequestAttemptError::BodyExtensionsParseError(_)
161+
| RequestAttemptError::CqlErrorParseError(_)
162+
| RequestAttemptError::CqlRequestSerialization(_)
163+
| RequestAttemptError::CqlResultParseError(_)
164+
| RequestAttemptError::NonfinishedPagingState
165+
| RequestAttemptError::RepreparedIdChanged { .. }
166+
| RequestAttemptError::RepreparedIdMissingInBatch
167+
| RequestAttemptError::SerializationError(_)
168+
| RequestAttemptError::UnexpectedResponse(_) => RetryDecision::DontRetry,
133169
}
134170
}
135171

scylla/src/policies/retry/downgrading_consistency.rs

Lines changed: 105 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -89,88 +89,127 @@ impl RetrySession for DowngradingConsistencyRetrySession {
8989
decision
9090
}
9191

92+
// Do not remove this lint!
93+
// It's there for a reason - we don't want new variants
94+
// automatically fall under `_` pattern when they are introduced.
95+
#[deny(clippy::wildcard_enum_match_arm)]
9296
match request_info.error {
93-
// Basic errors - there are some problems on this node
94-
// Retry on a different one if possible
95-
RequestAttemptError::BrokenConnectionError(_)
96-
| RequestAttemptError::DbError(DbError::Overloaded, _)
97-
| RequestAttemptError::DbError(DbError::ServerError, _)
98-
| RequestAttemptError::DbError(DbError::TruncateError, _) => {
97+
// With connection broken, we don't know if request was executed.
98+
RequestAttemptError::BrokenConnectionError(_) => {
9999
if request_info.is_idempotent {
100100
RetryDecision::RetryNextTarget(None)
101101
} else {
102102
RetryDecision::DontRetry
103103
}
104104
}
105-
// Unavailable - the current node believes that not enough nodes
106-
// are alive to satisfy specified consistency requirements.
107-
RequestAttemptError::DbError(DbError::Unavailable { alive, .. }, _) => {
108-
if !self.was_retry {
109-
self.was_retry = true;
110-
max_likely_to_work_cl(*alive, cl)
111-
} else {
112-
RetryDecision::DontRetry
113-
}
114-
}
115-
// ReadTimeout - coordinator didn't receive enough replies in time.
116-
RequestAttemptError::DbError(
117-
DbError::ReadTimeout {
118-
received,
119-
required,
120-
data_present,
121-
..
122-
},
123-
_,
124-
) => {
125-
if self.was_retry {
126-
RetryDecision::DontRetry
127-
} else if received < required {
128-
self.was_retry = true;
129-
max_likely_to_work_cl(*received, cl)
130-
} else if !*data_present {
131-
self.was_retry = true;
132-
RetryDecision::RetrySameTarget(None)
133-
} else {
134-
RetryDecision::DontRetry
135-
}
136-
}
137-
// Write timeout - coordinator didn't receive enough replies in time.
138-
RequestAttemptError::DbError(
139-
DbError::WriteTimeout {
140-
write_type,
141-
received,
142-
..
143-
},
144-
_,
145-
) => {
146-
if self.was_retry || !request_info.is_idempotent {
147-
RetryDecision::DontRetry
148-
} else {
149-
self.was_retry = true;
150-
match write_type {
151-
WriteType::Batch | WriteType::Simple if *received > 0 => {
152-
RetryDecision::IgnoreWriteError
105+
// DbErrors
106+
RequestAttemptError::DbError(db_error, _) => {
107+
// Do not remove this lint!
108+
// It's there for a reason - we don't want new variants
109+
// automatically fall under `_` pattern when they are introduced.
110+
#[deny(clippy::wildcard_enum_match_arm)]
111+
match db_error {
112+
// Basic errors - there are some problems on this node
113+
// Retry on a different one if possible
114+
DbError::Overloaded | DbError::ServerError | DbError::TruncateError => {
115+
if request_info.is_idempotent {
116+
RetryDecision::RetryNextTarget(None)
117+
} else {
118+
RetryDecision::DontRetry
153119
}
154-
155-
WriteType::UnloggedBatch => {
156-
// Since only part of the batch could have been persisted,
157-
// retry with whatever consistency should allow to persist all
120+
}
121+
// Unavailable - the current node believes that not enough nodes
122+
// are alive to satisfy specified consistency requirements.
123+
DbError::Unavailable { alive, .. } => {
124+
if !self.was_retry {
125+
self.was_retry = true;
126+
max_likely_to_work_cl(*alive, cl)
127+
} else {
128+
RetryDecision::DontRetry
129+
}
130+
}
131+
// ReadTimeout - coordinator didn't receive enough replies in time.
132+
DbError::ReadTimeout {
133+
received,
134+
required,
135+
data_present,
136+
..
137+
} => {
138+
if self.was_retry {
139+
RetryDecision::DontRetry
140+
} else if received < required {
141+
self.was_retry = true;
158142
max_likely_to_work_cl(*received, cl)
143+
} else if !*data_present {
144+
self.was_retry = true;
145+
RetryDecision::RetrySameTarget(None)
146+
} else {
147+
RetryDecision::DontRetry
148+
}
149+
}
150+
// Write timeout - coordinator didn't receive enough replies in time.
151+
DbError::WriteTimeout {
152+
write_type,
153+
received,
154+
..
155+
} => {
156+
if self.was_retry || !request_info.is_idempotent {
157+
RetryDecision::DontRetry
158+
} else {
159+
self.was_retry = true;
160+
match write_type {
161+
WriteType::Batch | WriteType::Simple if *received > 0 => {
162+
RetryDecision::IgnoreWriteError
163+
}
164+
165+
WriteType::UnloggedBatch => {
166+
// Since only part of the batch could have been persisted,
167+
// retry with whatever consistency should allow to persist all
168+
max_likely_to_work_cl(*received, cl)
169+
}
170+
WriteType::BatchLog => RetryDecision::RetrySameTarget(None),
171+
172+
WriteType::Counter
173+
| WriteType::Cas
174+
| WriteType::View
175+
| WriteType::Cdc
176+
| WriteType::Simple
177+
| WriteType::Batch
178+
| WriteType::Other(_) => RetryDecision::DontRetry,
179+
}
159180
}
160-
WriteType::BatchLog => RetryDecision::RetrySameTarget(None),
161-
162-
_ => RetryDecision::DontRetry,
163181
}
182+
// The node is still bootstrapping it can't execute the request, we should try another one
183+
DbError::IsBootstrapping => RetryDecision::RetryNextTarget(None),
184+
// In all other cases propagate the error to the user
185+
DbError::SyntaxError
186+
| DbError::Invalid
187+
| DbError::AlreadyExists { .. }
188+
| DbError::FunctionFailure { .. }
189+
| DbError::AuthenticationError
190+
| DbError::Unauthorized
191+
| DbError::ConfigError
192+
| DbError::ReadFailure { .. }
193+
| DbError::WriteFailure { .. }
194+
| DbError::Unprepared { .. }
195+
| DbError::ProtocolError
196+
| DbError::RateLimitReached { .. }
197+
| DbError::Other(_)
198+
| _ => RetryDecision::DontRetry,
164199
}
165200
}
166-
// The node is still bootstrapping it can't execute the request, we should try another one
167-
RequestAttemptError::DbError(DbError::IsBootstrapping, _) => {
168-
RetryDecision::RetryNextTarget(None)
169-
}
170201
// Connection to the contacted node is overloaded, try another one
171202
RequestAttemptError::UnableToAllocStreamId => RetryDecision::RetryNextTarget(None),
172203
// In all other cases propagate the error to the user
173-
_ => RetryDecision::DontRetry,
204+
RequestAttemptError::BodyExtensionsParseError(_)
205+
| RequestAttemptError::CqlErrorParseError(_)
206+
| RequestAttemptError::CqlRequestSerialization(_)
207+
| RequestAttemptError::CqlResultParseError(_)
208+
| RequestAttemptError::NonfinishedPagingState
209+
| RequestAttemptError::RepreparedIdChanged { .. }
210+
| RequestAttemptError::RepreparedIdMissingInBatch
211+
| RequestAttemptError::SerializationError(_)
212+
| RequestAttemptError::UnexpectedResponse(_) => RetryDecision::DontRetry,
174213
}
175214
}
176215

0 commit comments

Comments
 (0)