Skip to content

Commit bca2413

Browse files
davidv1992rnijveld
authored andcommitted
[Monitor] Make more probe errors failures for the remote.
This ensures we don't accidentally blame ourselves for problems the remote creates, even if they show up in unexpected ways.
1 parent 86f169a commit bca2413

File tree

2 files changed

+78
-15
lines changed

2 files changed

+78
-15
lines changed

nts-pool-monitor/src/probe.rs

Lines changed: 75 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -110,12 +110,7 @@ impl Probe {
110110
Ok(Err(NtsError::Error(pool_nts::ErrorCode::NoSuchServer))) => {
111111
return Err(eyre::eyre!("Server not known (yet)"));
112112
}
113-
Ok(Err(NtsError::Error(
114-
pool_nts::ErrorCode::BadRequest | pool_nts::ErrorCode::InternalServerError,
115-
))) => {
116-
return Err(eyre::eyre!("KELB could not succesfully handle our request"));
117-
}
118-
Ok(Err(e @ NtsError::Invalid | e @ NtsError::Error(_))) => {
113+
Ok(Err(e)) => {
119114
let end_time = Instant::now();
120115
return Ok((
121116
KeyExchangeProbeResult {
@@ -125,10 +120,8 @@ impl Probe {
125120
"Time source's response was invalid but well-structured".into()
126121
}
127122
NtsError::Error(e) => e.to_string(),
128-
_ => {
129-
return Err(eyre::eyre!(
130-
"Unexpected branch taken in error description"
131-
));
123+
e => {
124+
format!("Unexpected error during key exchange: {e}")
132125
}
133126
},
134127
exchange_start,
@@ -138,7 +131,6 @@ impl Probe {
138131
None,
139132
));
140133
}
141-
Ok(Err(e)) => return Err(e.into()),
142134
Err(_) => {
143135
let end_time = Instant::now();
144136
return Ok((
@@ -213,12 +205,57 @@ impl Probe {
213205
}
214206
Err(e) => return Err(e),
215207
};
216-
let mut socket = timestamped_socket::socket::connect_address(
208+
let mut socket = match timestamped_socket::socket::connect_address(
217209
addr,
218210
timestamped_socket::socket::GeneralTimestampMode::SoftwareAll,
219-
)?;
211+
) {
212+
Ok(socket) => socket,
213+
Err(e) => {
214+
tracing::debug!("Unexpected error during connect: {e}");
215+
return Ok((
216+
SecuredNtpProbeResult {
217+
status: SecuredNtpProbeStatus::CouldNotConnect,
218+
request_sent: SystemTime::now()
219+
.duration_since(SystemTime::UNIX_EPOCH)
220+
.unwrap_or_default()
221+
.as_secs(),
222+
requested_cookies: 0,
223+
received_cookies: 0,
224+
roundtrip_duration: None,
225+
remote_residence_time: None,
226+
offset: None,
227+
stratum: None,
228+
leap_indicates_synchronized: false,
229+
},
230+
None,
231+
));
232+
}
233+
};
220234

221-
let send = socket.send(msg).await?.ok_or(std::io::ErrorKind::Other)?;
235+
let send = match socket.send(msg).await {
236+
// Timestamp not present errors are purely internal problems, don't blame remote for those.
237+
Ok(timestamp) => timestamp.ok_or(std::io::ErrorKind::Other)?,
238+
Err(e) => {
239+
tracing::debug!("Unexpected error during send: {e}");
240+
return Ok((
241+
SecuredNtpProbeResult {
242+
status: SecuredNtpProbeStatus::CouldNotSend,
243+
request_sent: SystemTime::now()
244+
.duration_since(SystemTime::UNIX_EPOCH)
245+
.unwrap_or_default()
246+
.as_secs(),
247+
requested_cookies: 0,
248+
received_cookies: 0,
249+
roundtrip_duration: None,
250+
remote_residence_time: None,
251+
offset: None,
252+
stratum: None,
253+
leap_indicates_synchronized: false,
254+
},
255+
None,
256+
));
257+
}
258+
};
222259
let t1 = NtpTimestamp::from_net_timestamp(send);
223260

224261
let mut have_deny = false;
@@ -228,7 +265,29 @@ impl Probe {
228265
let (t4, msg) = loop {
229266
let received = select! {
230267
biased;
231-
r = socket.recv(&mut buf) => r?,
268+
r = socket.recv(&mut buf) => match r {
269+
Ok(r) => r,
270+
Err(e) => {
271+
tracing::debug!("Unexpected error during recv: {e}");
272+
return Ok((
273+
SecuredNtpProbeResult {
274+
status: SecuredNtpProbeStatus::CouldNotReceive,
275+
request_sent: SystemTime::now()
276+
.duration_since(SystemTime::UNIX_EPOCH)
277+
.unwrap_or_default()
278+
.as_secs(),
279+
requested_cookies: 0,
280+
received_cookies: 0,
281+
roundtrip_duration: None,
282+
remote_residence_time: None,
283+
offset: None,
284+
stratum: None,
285+
leap_indicates_synchronized: false,
286+
},
287+
None,
288+
));
289+
}
290+
},
232291
_ = &mut timeout => {
233292
return Ok((
234293
SecuredNtpProbeResult {
@@ -302,6 +361,7 @@ impl Probe {
302361

303362
break (
304363
NtpTimestamp::from_net_timestamp(
364+
// Timestamps should be present, problems with that are a bug on our side.
305365
received.timestamp.ok_or(std::io::ErrorKind::Other)?,
306366
),
307367
incoming,

nts-pool-shared/src/monitoring.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,9 @@ pub struct KeyExchangeProbeResult {
6060
pub enum SecuredNtpProbeStatus {
6161
Success,
6262
DnsLookupFailed,
63+
CouldNotConnect,
64+
CouldNotSend,
65+
CouldNotReceive,
6366
NtsNak,
6467
Deny,
6568
Timeout,

0 commit comments

Comments
 (0)