Skip to content

Commit cd6470b

Browse files
committed
splitstream: Rework file format
This changes the splitstream format a bit. The primary differences are: * The header is not compressed * All referenced fs-verity objects are stored in the header, including external chunks, mapped splitstreams and (a new feature) references that are not used in chunks. * The mapping table is separate from the reference table (and generally smaller), and indexes into it. * There is a magic value to detect the file format. * We store a tag for what ObjectID format is used * The total size of the stream is stored in the header. The ability to reference file objects in the repo even if they are not part of the splitstream "content" will be useful for the ostree support to reference file content objects. This change also allows More efficient GC enumeration, because we don't have to parse the entire splitstream to find the referenced objects.
1 parent 0f8d2eb commit cd6470b

File tree

7 files changed

+200
-95
lines changed

7 files changed

+200
-95
lines changed

crates/composefs-http/src/lib.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ use tokio::task::JoinSet;
1515
use composefs::{
1616
fsverity::FsVerityHashValue,
1717
repository::Repository,
18-
splitstream::{DigestMapEntry, SplitStreamReader},
18+
splitstream::{SplitStreamReader},
1919
util::Sha256Digest,
2020
};
2121

@@ -107,7 +107,7 @@ impl<ObjectID: FsVerityHashValue> Downloader<ObjectID> {
107107

108108
// this part is fast: it only touches the header
109109
let mut reader = self.open_splitstream(&id)?;
110-
for DigestMapEntry { verity, body } in &reader.refs.map {
110+
for (body, verity) in reader.iter_mappings() {
111111
match splitstreams.insert(verity.clone(), Some(*body)) {
112112
// This is the (normal) case if we encounter a splitstream we didn't see yet...
113113
None => {

crates/composefs-oci/src/lib.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ pub fn open_config<ObjectID: FsVerityHashValue>(
8383
};
8484
let mut stream = repo.open_stream(name, Some(id))?;
8585
let config = ImageConfiguration::from_reader(&mut stream)?;
86-
Ok((config, stream.refs))
86+
Ok((config, stream.get_mappings()))
8787
}
8888

8989
fn hash(bytes: &[u8]) -> Sha256Digest {
@@ -121,7 +121,8 @@ pub fn write_config<ObjectID: FsVerityHashValue>(
121121
let json = config.to_string()?;
122122
let json_bytes = json.as_bytes();
123123
let sha256 = hash(json_bytes);
124-
let mut stream = repo.create_stream(Some(sha256), Some(refs));
124+
let mut stream = repo.create_stream(Some(sha256));
125+
stream.add_sha256_mappings(refs);
125126
stream.write_inline(json_bytes);
126127
let id = repo.write_stream(stream, None)?;
127128
Ok((sha256, id))

crates/composefs-oci/src/skopeo.rs

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ use rustix::process::geteuid;
1111
use tokio::{io::AsyncReadExt, sync::Semaphore};
1212

1313
use composefs::{
14-
fsverity::FsVerityHashValue, repository::Repository, splitstream::DigestMap, util::Sha256Digest,
14+
fsverity::FsVerityHashValue, repository::Repository, util::Sha256Digest,
1515
};
1616

1717
use crate::{sha256_from_descriptor, sha256_from_digest, tar::split_async, ContentAndVerity};
@@ -78,7 +78,7 @@ impl<ObjectID: FsVerityHashValue> ImageOp<ObjectID> {
7878
self.progress
7979
.println(format!("Fetching layer {}", hex::encode(layer_sha256)))?;
8080

81-
let mut splitstream = self.repo.create_stream(Some(layer_sha256), None);
81+
let mut splitstream = self.repo.create_stream(Some(layer_sha256));
8282
match descriptor.media_type() {
8383
MediaType::ImageLayer => {
8484
split_async(progress, &mut splitstream).await?;
@@ -155,15 +155,15 @@ impl<ObjectID: FsVerityHashValue> ImageOp<ObjectID> {
155155
entries.push((layer_sha256, future));
156156
}
157157

158+
let mut splitstream = self
159+
.repo
160+
.create_stream(Some(config_sha256));
161+
158162
// Collect the results.
159-
let mut config_maps = DigestMap::new();
160163
for (layer_sha256, future) in entries {
161-
config_maps.insert(&layer_sha256, &future.await??);
164+
splitstream.add_sha256_mapping(&layer_sha256, &future.await??);
162165
}
163166

164-
let mut splitstream = self
165-
.repo
166-
.create_stream(Some(config_sha256), Some(config_maps));
167167
splitstream.write_inline(&raw_config);
168168
let config_id = self.repo.write_stream(splitstream, None)?;
169169

crates/composefs/src/fsverity/hashvalue.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use core::{fmt, hash::Hash};
22

33
use hex::FromHexError;
44
use sha2::{digest::FixedOutputReset, digest::Output, Digest, Sha256, Sha512};
5+
use std::cmp::Ord;
56
use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout, Unaligned};
67

78
pub trait FsVerityHashValue
@@ -12,6 +13,7 @@ where
1213
Self: Hash + Eq,
1314
Self: fmt::Debug,
1415
Self: Send + Sync + Unpin + 'static,
16+
Self: PartialOrd + Ord,
1517
{
1618
type Digest: Digest + FixedOutputReset + fmt::Debug;
1719
const ALGORITHM: u8;
@@ -93,7 +95,7 @@ impl fmt::Debug for Sha512HashValue {
9395
}
9496
}
9597

96-
#[derive(Clone, Eq, FromBytes, Hash, Immutable, IntoBytes, KnownLayout, PartialEq, Unaligned)]
98+
#[derive(Clone, Eq, FromBytes, Hash, Immutable, IntoBytes, KnownLayout, PartialEq, Unaligned, PartialOrd, Ord)]
9799
#[repr(C)]
98100
pub struct Sha256HashValue([u8; 32]);
99101

@@ -110,7 +112,7 @@ impl FsVerityHashValue for Sha256HashValue {
110112
const ID: &str = "sha256";
111113
}
112114

113-
#[derive(Clone, Eq, FromBytes, Hash, Immutable, IntoBytes, KnownLayout, PartialEq, Unaligned)]
115+
#[derive(Clone, Eq, FromBytes, Hash, Immutable, IntoBytes, KnownLayout, PartialEq, Unaligned, PartialOrd, Ord)]
114116
#[repr(C)]
115117
pub struct Sha512HashValue([u8; 64]);
116118

crates/composefs/src/repository.rs

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ use crate::{
2424
compute_verity, enable_verity, ensure_verity_equal, measure_verity, FsVerityHashValue,
2525
},
2626
mount::mount_composefs_at,
27-
splitstream::{DigestMap, SplitStreamReader, SplitStreamWriter},
27+
splitstream::{SplitStreamReader, SplitStreamWriter},
2828
util::{filter_errno, proc_self_fd, replace_symlinkat, Sha256Digest},
2929
};
3030

@@ -185,9 +185,8 @@ impl<ObjectID: FsVerityHashValue> Repository<ObjectID> {
185185
pub fn create_stream(
186186
self: &Arc<Self>,
187187
sha256: Option<Sha256Digest>,
188-
maps: Option<DigestMap<ObjectID>>,
189188
) -> SplitStreamWriter<ObjectID> {
190-
SplitStreamWriter::new(self, maps, sha256)
189+
SplitStreamWriter::new(self, sha256)
191190
}
192191

193192
fn format_object_path(id: &ObjectID) -> String {
@@ -227,8 +226,8 @@ impl<ObjectID: FsVerityHashValue> Repository<ObjectID> {
227226
let mut split_stream = SplitStreamReader::new(File::from(stream))?;
228227

229228
// check the verity of all linked streams
230-
for entry in &split_stream.refs.map {
231-
if self.check_stream(&entry.body)?.as_ref() != Some(&entry.verity) {
229+
for (body, verity) in split_stream.iter_mappings() {
230+
if self.check_stream(body)?.as_ref() != Some(verity) {
232231
bail!("reference mismatch");
233232
}
234233
}
@@ -305,7 +304,7 @@ impl<ObjectID: FsVerityHashValue> Repository<ObjectID> {
305304
let object_id = match self.has_stream(sha256)? {
306305
Some(id) => id,
307306
None => {
308-
let mut writer = self.create_stream(Some(*sha256), None);
307+
let mut writer = self.create_stream(Some(*sha256));
309308
callback(&mut writer)?;
310309
let object_id = writer.done()?;
311310

0 commit comments

Comments
 (0)