Skip to content

Commit d674a6d

Browse files
committed
chore: refactor the dump macro, and allow appending zstd suffixes
1 parent a1499eb commit d674a6d

File tree

4 files changed

+124
-50
lines changed

4 files changed

+124
-50
lines changed

Cargo.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

migration/tests/data/m0002010.rs

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -44,13 +44,18 @@ async fn examples(
4444
}
4545

4646
// A dump created before merging the SBOM CVSS enhancements
47-
dump!(Ds4(
48-
"https://trustify-dumps-ds4.s3.eu-west-1.amazonaws.com/20251104T095645Z",
49-
storage = "dump.tar.gz",
50-
no_digests,
51-
));
47+
dump!(
48+
Ds4("https://trustify-dumps-ds4.s3.eu-west-1.amazonaws.com/20251104T095645Z")
49+
.storage_file("dump.tar.gz")
50+
.no_digests()
51+
.strip(5)
52+
.fix_zstd()
53+
);
5254

5355
/// Test the performance of applying the data migration of `m0002010`.
56+
///
57+
/// **NOTE:** If this test runs out of disk space, you can set `TMPDIR` to a directory with more
58+
/// space.
5459
#[test_context(TrustifyMigrationContext<Ds4>)]
5560
#[test(tokio::test)]
5661
async fn performance(ctx: &TrustifyMigrationContext<Ds4>) -> Result<(), anyhow::Error> {

modules/storage/src/service/fs.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,9 +111,11 @@ impl FileSystemBackend {
111111
for compression in &self.read_compressions {
112112
target.set_extension(compression.extension());
113113
if try_exists(&target).await? {
114+
log::debug!("Located: {}", target.display());
114115
return Ok(Some((target, *compression)));
115116
}
116117
}
118+
log::info!("Missing - base: {target:?}");
117119
Ok(None)
118120
}
119121
}

test-context/src/ctx/migration.rs

Lines changed: 111 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@ use crate::{
44
migration::{Dump, Dumps},
55
};
66
use anyhow::Context;
7-
use std::{borrow::Cow, marker::PhantomData, ops::Deref};
7+
use std::{borrow::Cow, fs::OpenOptions, io::Write, marker::PhantomData, ops::Deref, path::PathBuf};
8+
use walkdir::WalkDir;
89
use tar::Archive;
910
use test_context::AsyncTestContext;
1011
use trustify_common::decompress::decompress_read;
@@ -26,52 +27,76 @@ macro_rules! commit {
2627

2728
#[macro_export]
2829
macro_rules! dump {
29-
($t:ident($url:literal $(, $($rest:tt)*)? )) => {
30-
$crate::dump!(@parse $t, $url, db = "dump.sql.gz", storage = "dump.tar", digests = true, $($($rest)*)?);
31-
};
32-
33-
(@parse $t:ident, $url:literal, db = $db:literal, storage = $storage:literal, digests = $digests:expr, db = $new_db:literal, $($rest:tt)*) => {
34-
$crate::dump!(@parse $t, $url, db = $new_db, storage = $storage, digests = $digests, $($rest)*);
35-
};
36-
(@parse $t:ident, $url:literal, db = $db:literal, storage = $storage:literal, digests = $digests:expr, storage = $new_storage:literal, $($rest:tt)*) => {
37-
$crate::dump!(@parse $t, $url, db = $db, storage = $new_storage, digests = $digests, $($rest)*);
38-
};
39-
(@parse $t:ident, $url:literal, db = $db:literal, storage = $storage:literal, digests = $digests:expr, no_digests, $($rest:tt)*) => {
40-
$crate::dump!(@parse $t, $url, db = $db, storage = $storage, digests = false, $($rest)*);
41-
};
42-
43-
(@parse $t:ident, $url:literal, db = $db:literal, storage = $storage:literal, digests = $digests:expr,) => {
44-
$crate::dump!(@emit $t, $url, $db, $storage, $digests);
45-
};
46-
47-
(@emit $t:ident, $url:literal, $db:literal, $storage:literal, $digests:expr) => {
30+
($t:ident($url:literal) $($chain:tt)*) => {
4831
pub struct $t;
4932

5033
impl $crate::ctx::DumpId for $t {
5134
fn dump_id() -> $crate::ctx::Source {
52-
$crate::ctx::Source::Dump {
53-
base_url: $url,
54-
db_file: $db,
55-
storage_file: $storage,
56-
digests: $digests,
57-
}
35+
$crate::ctx::Source::Dump(
36+
$crate::ctx::DumpSource::new($url) $($chain)*
37+
)
5838
}
5939
}
6040
};
6141
}
6242

43+
pub struct DumpSource {
44+
pub base_url: &'static str,
45+
pub db_file: &'static str,
46+
pub storage_file: &'static str,
47+
pub digests: bool,
48+
pub strip: usize,
49+
pub fix_zstd: bool,
50+
}
51+
52+
impl DumpSource {
53+
pub fn new(base_url: &'static str) -> Self {
54+
Self {
55+
base_url,
56+
db_file: "dump.sql.gz",
57+
storage_file: "dump.tar",
58+
digests: true,
59+
strip: 0,
60+
fix_zstd: false,
61+
}
62+
}
63+
64+
pub fn db_file(mut self, v: &'static str) -> Self {
65+
self.db_file = v;
66+
self
67+
}
68+
69+
pub fn storage_file(mut self, v: &'static str) -> Self {
70+
self.storage_file = v;
71+
self
72+
}
73+
74+
pub fn digests(mut self, v: bool) -> Self {
75+
self.digests = v;
76+
self
77+
}
78+
79+
pub fn no_digests(self) -> Self {
80+
self.digests(false)
81+
}
82+
83+
pub fn strip(mut self, v: usize) -> Self {
84+
self.strip = v;
85+
self
86+
}
87+
88+
/// Appends the zstd EOF marker (`[0x01, 0x00, 0x00]`) to all `.zstd` files in the storage
89+
/// directory after unpacking. Older dump generation did not properly close the zstd stream,
90+
/// leaving the EOF marker unwritten.
91+
pub fn fix_zstd(mut self) -> Self {
92+
self.fix_zstd = true;
93+
self
94+
}
95+
}
96+
6397
pub enum Source {
6498
Migration(Option<&'static str>),
65-
Dump {
66-
/// base URL to the dump files
67-
base_url: &'static str,
68-
/// DB file name
69-
db_file: &'static str,
70-
/// storage archive
71-
storage_file: &'static str,
72-
/// if there are digests for the files
73-
digests: bool,
74-
},
99+
Dump(DumpSource),
75100
}
76101

77102
pub trait DumpId {
@@ -100,23 +125,25 @@ impl<ID: DumpId> Deref for TrustifyMigrationContext<ID> {
100125

101126
impl<ID: DumpId> TrustifyMigrationContext<ID> {
102127
pub async fn new() -> anyhow::Result<Self> {
103-
let (base, db_file, storage_file) = match ID::dump_id() {
128+
let (base, db_file, storage_file, strip, fix_zstd) = match ID::dump_id() {
104129
Source::Migration(migration) => {
105130
let id: Cow<'static, str> = match migration {
106131
Some(id) => format!("commit-{id}").into(),
107132
None => "latest".into(),
108133
};
109134
let migration = Migration::new().context("failed to create migration manager")?;
110135
let base = migration.provide(&id).await?;
111-
(base, "dump.sql.xz", "dump.tar")
136+
(base, "dump.sql.xz", "dump.tar", 0usize, false)
112137
}
113138

114-
Source::Dump {
139+
Source::Dump(DumpSource {
115140
base_url,
116141
db_file,
117142
storage_file,
118143
digests,
119-
} => {
144+
strip,
145+
fix_zstd,
146+
}) => {
120147
let base = Dumps::new()?
121148
.provide(Dump {
122149
url: base_url,
@@ -125,7 +152,7 @@ impl<ID: DumpId> TrustifyMigrationContext<ID> {
125152
})
126153
.await?;
127154

128-
(base, db_file, storage_file)
155+
(base, db_file, storage_file, strip, fix_zstd)
129156
}
130157
};
131158

@@ -141,12 +168,52 @@ impl<ID: DumpId> TrustifyMigrationContext<ID> {
141168
let source = decompress_read(storage_file).context("failed to open storage dump")?;
142169

143170
let mut archive = Archive::new(source);
144-
archive
145-
.unpack(tmp.path())
146-
.context("failed to unpack storage dump")?;
171+
if strip == 0 {
172+
archive
173+
.unpack(tmp.path())
174+
.context("failed to unpack storage dump")?;
175+
} else {
176+
for entry in archive
177+
.entries()
178+
.context("failed to read storage archive entries")?
179+
{
180+
let mut entry = entry.context("failed to read storage archive entry")?;
181+
let path = entry
182+
.path()
183+
.context("failed to get entry path")?
184+
.into_owned();
185+
let stripped: PathBuf = path.components().skip(strip).collect();
186+
if stripped.as_os_str().is_empty() {
187+
continue;
188+
}
189+
// NOTE: `unpack` (vs `unpack_in`) has no path traversal protection, but
190+
// this is test-only code and the archive content is generated by us and trusted.
191+
entry
192+
.unpack(tmp.path().join(stripped))
193+
.context("failed to unpack storage archive entry")?;
194+
}
195+
}
147196

148197
log::info!("Storage unpacked");
149198

199+
if fix_zstd {
200+
const ZSTD_EOF_BYTES: [u8; 3] = [0x01, 0x00, 0x00];
201+
for entry in WalkDir::new(tmp.path()) {
202+
let entry = entry.context("failed to walk storage directory")?;
203+
if entry.file_type().is_file()
204+
&& entry.path().extension().and_then(|e| e.to_str()) == Some("zstd")
205+
{
206+
let mut file = OpenOptions::new()
207+
.append(true)
208+
.open(entry.path())
209+
.with_context(|| format!("failed to open zstd file: {}", entry.path().display()))?;
210+
file.write_all(&ZSTD_EOF_BYTES)
211+
.with_context(|| format!("failed to append EOF bytes to: {}", entry.path().display()))?;
212+
}
213+
}
214+
log::info!("Fixed zstd EOF bytes");
215+
}
216+
150217
// create DB
151218

152219
let settings = default_settings().context("unable to create default settings")?;

0 commit comments

Comments
 (0)