Skip to content

Commit 74b8baa

Browse files
authored
fix(iroh-dns-server): Backwards compatibility for packets stored with iroh-dns-server v0.34 or lower (#3295)
## Description #3186 introduced a backwards-incompatible change to the iroh-dns-server database that went uncaught so far. The serialization format for SignedPackets changed between pkarr v2 and pkarr v3: pkarr v3 prepends a `last_seen` timestamp (8 bytes) to the serialized representation of a SignedPacket. Therefore, parsing packets stored in the the database with pkarr v2 (iroh-dns-server v0.34 and lower) fails with an error. The error also stops the store actor, and thus all subsequent requests will fail as well. This PR changes this by attempting to parse the packet in the pkarr v2 format if parsing as pkarr v3 packet fails. Parsing as pkarr v2 is achieved by prepending 8 zero bytes to the payload (an empty last_seen timestamp). The PR also improves the tracing logging at various places. ## Breaking Changes <!-- Optional, if there are any breaking changes document them, including how to migrate older code. --> ## Notes & open questions There's no test yet. I tested manually by launching iroh-dns-server v0.34, inserting some packets with the `publish` example. When restarting the server from `main`, retrieving packets leads to errors. When applying this PR, retrieving the old packets work. If we want / need a test, how should we do it? We can either add a dev dependency to `[email protected]` (which would recursively pull in all of `[email protected]`), or somehow insert a packet as it would be created by `[email protected]` manually into the database. Not sure if it's worth it, but maybe it is? ## Change checklist <!-- Remove any that are not relevant. --> - [x] Self-review. - [x] Documentation updates following the [style guide](https://rust-lang.github.io/rfcs/1574-more-api-documentation-conventions.html#appendix-a-full-conventions-text), if relevant. - [ ] Tests if relevant.
1 parent 30577d3 commit 74b8baa

File tree

3 files changed

+71
-21
lines changed

3 files changed

+71
-21
lines changed

iroh-dns-server/src/store.rs

Lines changed: 35 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ use hickory_server::proto::rr::{Name, RecordSet, RecordType, RrKey};
77
use lru::LruCache;
88
use pkarr::{Client as PkarrClient, SignedPacket};
99
use tokio::sync::Mutex;
10-
use tracing::{debug, trace};
10+
use tracing::{debug, trace, warn};
1111
use ttl_cache::TtlCache;
1212

1313
use self::signed_packets::SignedPacketStore;
@@ -92,24 +92,46 @@ impl ZoneStore {
9292
}
9393

9494
/// Resolve a DNS query.
95-
#[allow(clippy::unused_async)]
95+
#[tracing::instrument("resolve", skip_all, fields(pubkey=%pubkey,name=%name,typ=%record_type))]
9696
pub async fn resolve(
9797
&self,
9898
pubkey: &PublicKeyBytes,
9999
name: &Name,
100100
record_type: RecordType,
101101
) -> Result<Option<Arc<RecordSet>>> {
102-
tracing::info!("{} {}", name, record_type);
102+
trace!("store resolve");
103103
if let Some(rset) = self.cache.lock().await.resolve(pubkey, name, record_type) {
104+
debug!(
105+
len = rset.records_without_rrsigs().count(),
106+
"resolved from cache"
107+
);
104108
return Ok(Some(rset));
105109
}
106110

107111
if let Some(packet) = self.store.get(pubkey).await? {
108-
return self
112+
trace!(packet_timestamp = ?packet.timestamp(), "store hit");
113+
return match self
109114
.cache
110115
.lock()
111116
.await
112-
.insert_and_resolve(&packet, name, record_type);
117+
.insert_and_resolve(&packet, name, record_type)
118+
{
119+
Ok(Some(rset)) => {
120+
debug!(
121+
len = rset.records_without_rrsigs().count(),
122+
"resolved from store"
123+
);
124+
Ok(Some(rset))
125+
}
126+
Ok(None) => {
127+
debug!("resolved to zone, but no matching records in zone");
128+
Ok(None)
129+
}
130+
Err(err) => {
131+
warn!("failed to retrieve zone after inserting in cache: {err:#?}");
132+
Err(err)
133+
}
134+
};
113135
};
114136

115137
if let Some(pkarr) = self.pkarr.as_ref() {
@@ -226,11 +248,14 @@ impl ZoneCache {
226248
.map(|old| old.is_newer_than(signed_packet))
227249
.unwrap_or(false)
228250
{
229-
return Ok(());
251+
trace!("insert skip: cached is newer");
252+
Ok(())
253+
} else {
254+
self.cache
255+
.put(pubkey, CachedZone::from_signed_packet(signed_packet)?);
256+
trace!("inserted into cache");
257+
Ok(())
230258
}
231-
self.cache
232-
.put(pubkey, CachedZone::from_signed_packet(signed_packet)?);
233-
Ok(())
234259
}
235260

236261
fn remove(&mut self, pubkey: &PublicKeyBytes) {
@@ -260,10 +285,8 @@ impl CachedZone {
260285
}
261286

262287
fn resolve(&self, name: &Name, record_type: RecordType) -> Option<Arc<RecordSet>> {
288+
trace!(name=%name, typ=%record_type, "resolve in zone");
263289
let key = RrKey::new(name.into(), record_type);
264-
for record in self.records.keys() {
265-
tracing::info!("record {:?}", record);
266-
}
267290
self.records.get(&key).cloned()
268291
}
269292
}

iroh-dns-server/src/store/signed_packets.rs

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ use redb::{
77
};
88
use tokio::sync::{mpsc, oneshot};
99
use tokio_util::sync::CancellationToken;
10-
use tracing::{debug, error, info, trace};
10+
use tracing::{debug, error, info, trace, warn};
1111

1212
use crate::{metrics::Metrics, util::PublicKeyBytes};
1313

@@ -132,9 +132,16 @@ impl Actor {
132132
Some(msg) = self.recv.recv() => {
133133
match msg {
134134
Message::Get { key, res } => {
135-
trace!("get {}", key);
136-
let packet = get_packet(&tables.signed_packets, &key).context("get packet failed")?;
137-
res.send(packet).ok();
135+
match get_packet(&tables.signed_packets, &key) {
136+
Ok(packet) => {
137+
trace!("get {key}: {}", packet.is_some());
138+
res.send(packet).ok();
139+
},
140+
Err(err) => {
141+
warn!("get {key} failed: {err:#}");
142+
return Err(err).with_context(|| format!("get packet for {key} failed"))
143+
}
144+
}
138145
}
139146
Message::Upsert { packet, res } => {
140147
let key = PublicKeyBytes::from_signed_packet(&packet);
@@ -317,8 +324,23 @@ fn get_packet(
317324
let Some(row) = table.get(key.as_ref()).context("database fetch failed")? else {
318325
return Ok(None);
319326
};
320-
let packet = SignedPacket::deserialize(row.value()).context("parsing signed packet failed")?;
321-
Ok(Some(packet))
327+
match SignedPacket::deserialize(row.value()) {
328+
Ok(packet) => Ok(Some(packet)),
329+
Err(err) => {
330+
// Prior to iroh-dns-server v0.35, we stored packets in the default `SignedPacket::as_bytes` serialization from pkarr v2,
331+
// which did not include the `last_seen` timestamp added as a prefix in `SignedPacket::serialize` from pkarr v3.
332+
// If decoding the packet as a serialized pkarr v3 packet fails, we assume it was stored with iroh-dns-server before v0.35,
333+
// and prepend an empty timestamp.
334+
let data = row.value();
335+
let mut buf = Vec::with_capacity(data.len() + 8);
336+
buf.extend(&[0u8; 8]);
337+
buf.extend(data);
338+
match SignedPacket::deserialize(&buf) {
339+
Ok(packet) => Ok(Some(packet)),
340+
Err(err2) => Err(anyhow::anyhow!("Failed to decode as pkarr v3: {err:#}. Also failed to decode as pkarr v2: {err2:#}"))
341+
}
342+
}
343+
}
322344
}
323345

324346
async fn evict_task(send: mpsc::Sender<Message>, options: Options, cancel: CancellationToken) {

iroh/src/discovery/pkarr.rs

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -392,10 +392,15 @@ impl PkarrRelayClient {
392392
.send()
393393
.await?;
394394

395-
if !response.status().is_success() {
395+
let status = response.status();
396+
if !status.is_success() {
397+
let text = if let Ok(text) = response.text().await {
398+
text
399+
} else {
400+
"(no details provided)".to_string()
401+
};
396402
bail!(format!(
397-
"Publish request failed with status {}",
398-
response.status()
403+
"Publish request failed with status {status}: {text}",
399404
))
400405
}
401406

0 commit comments

Comments
 (0)