Skip to content

Commit 87a9bd7

Browse files
committed
repository: Add new "blob" feature
This is sort of similar to splitstream, except its just a blob of data with references, rather than something intended to be spliced together from objects. The short term usecases are ostree commit data, and object mappings, but it is flexible enough to be useful for other things too. Signed-off-by: Alexander Larsson <[email protected]>
1 parent 9e41509 commit 87a9bd7

File tree

5 files changed

+217
-8
lines changed

5 files changed

+217
-8
lines changed

crates/composefs/src/blob.rs

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
/* Implementation of the Split Stream file format
2+
*
3+
* See doc/splitstream.md
4+
*/
5+
6+
use std::{
7+
io::{Read, Write},
8+
sync::Arc,
9+
};
10+
11+
use anyhow::{bail, Result};
12+
13+
use crate::{fsverity::FsVerityHashValue, repository::Repository};
14+
15+
const BLOB_MAGIC_V1: u64 = 0xAFE138C18C463EF3;
16+
17+
#[derive(Debug)]
18+
pub struct BlobWriter<ObjectID: FsVerityHashValue> {
19+
repo: Arc<Repository<ObjectID>>,
20+
pub refs: Vec<ObjectID>,
21+
content: Vec<u8>,
22+
}
23+
24+
impl<ObjectID: FsVerityHashValue> BlobWriter<ObjectID> {
25+
pub fn new(repo: &Arc<Repository<ObjectID>>) -> Self {
26+
Self {
27+
repo: Arc::clone(repo),
28+
content: vec![],
29+
refs: vec![],
30+
}
31+
}
32+
33+
pub fn add_reference(&mut self, reference: &ObjectID) {
34+
self.refs.push(reference.clone())
35+
}
36+
37+
pub fn done(&self) -> Result<ObjectID> {
38+
let mut res = Vec::<u8>::new();
39+
res.extend_from_slice(&u64::to_le_bytes(BLOB_MAGIC_V1));
40+
res.extend_from_slice(&u64::to_le_bytes(self.refs.len() as u64));
41+
for obj_id in self.refs.iter() {
42+
res.extend_from_slice(obj_id.as_bytes());
43+
}
44+
res.extend_from_slice(&self.content);
45+
46+
self.repo.ensure_object(&res)
47+
}
48+
}
49+
50+
impl<ObjectID: FsVerityHashValue> Write for BlobWriter<ObjectID> {
51+
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
52+
self.content.extend_from_slice(buf);
53+
Ok(buf.len())
54+
}
55+
56+
fn flush(&mut self) -> std::io::Result<()> {
57+
Ok(())
58+
}
59+
}
60+
61+
#[derive(Debug)]
62+
pub struct BlobReader<R: Read, ObjectID: FsVerityHashValue> {
63+
reader: R,
64+
pub refs: Vec<ObjectID>,
65+
}
66+
67+
impl<R: Read, ObjectID: FsVerityHashValue> BlobReader<R, ObjectID> {
68+
pub fn new(mut reader: R) -> Result<Self> {
69+
let magic = {
70+
let mut buf = [0u8; 8];
71+
reader.read_exact(&mut buf)?;
72+
u64::from_le_bytes(buf)
73+
};
74+
if magic != BLOB_MAGIC_V1 {
75+
bail!("Invalida blob header magic value");
76+
}
77+
78+
let n_refs = {
79+
let mut buf = [0u8; 8];
80+
reader.read_exact(&mut buf)?;
81+
u64::from_le_bytes(buf)
82+
} as usize;
83+
84+
let mut refs = Vec::with_capacity(n_refs);
85+
for _ in 0..n_refs {
86+
refs.push(ObjectID::read_from_io(&mut reader)?);
87+
}
88+
89+
Ok(Self { reader, refs })
90+
}
91+
}
92+
93+
impl<F: Read, ObjectID: FsVerityHashValue> Read for BlobReader<F, ObjectID> {
94+
fn read(&mut self, data: &mut [u8]) -> std::io::Result<usize> {
95+
self.reader.read(data)
96+
}
97+
}

crates/composefs/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
pub mod blob;
12
pub mod dumpfile;
23
pub mod dumpfile_parse;
34
pub mod erofs;

crates/composefs/src/repository.rs

Lines changed: 67 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,15 @@ use once_cell::sync::OnceCell;
1313
use rand::{distr::Alphanumeric, Rng};
1414
use rustix::{
1515
fs::{
16-
fdatasync, flock, linkat, mkdirat, open, openat, readlinkat, symlinkat, AtFlags, Dir,
17-
FileType, FlockOperation, Mode, OFlags, CWD,
16+
fdatasync, flock, linkat, mkdirat, open, openat, readlinkat, renameat, symlinkat, AtFlags,
17+
Dir, FileType, FlockOperation, Mode, OFlags, CWD,
1818
},
1919
io::{Errno, Result as ErrnoResult},
2020
};
2121
use sha2::{Digest, Sha256};
2222

2323
use crate::{
24+
blob::{BlobReader, BlobWriter},
2425
fsverity::{
2526
compute_verity, enable_verity, ensure_verity_equal, measure_verity, FsVerityHashValue,
2627
},
@@ -362,6 +363,59 @@ impl<ObjectID: FsVerityHashValue> Repository<ObjectID> {
362363
Ok(())
363364
}
364365

366+
pub fn has_named_blob(&self, name: &str) -> bool {
367+
let blob_path = format!("blobs/refs/{}", name);
368+
369+
match readlinkat(&self.repository, &blob_path, []) {
370+
Ok(_) => true,
371+
Err(_) => false,
372+
}
373+
}
374+
375+
/// Creates a Blobriter for writing a blob.
376+
/// You should write the data to the returned object and then pass it to .store_blob() to
377+
/// store the result.
378+
pub fn create_blob(self: &Arc<Self>) -> BlobWriter<ObjectID> {
379+
BlobWriter::new(self)
380+
}
381+
382+
pub fn write_blob(&self, writer: BlobWriter<ObjectID>, name: Option<&str>) -> Result<ObjectID> {
383+
let object_id = writer.done()?;
384+
385+
let object_path = Self::format_object_path(&object_id);
386+
let blob_path = format!("blobs/{}", object_id.to_hex());
387+
388+
self.ensure_symlink(&blob_path, &object_path)?;
389+
390+
if let Some(reference) = name {
391+
let ref_path = format!("blobs/refs/{reference}");
392+
self.symlink(&ref_path, &blob_path)?;
393+
}
394+
395+
Ok(object_id)
396+
}
397+
398+
pub fn name_blob(&self, object_id: &ObjectID, name: &str) -> Result<()> {
399+
let blob_path = format!("blobs/{}", object_id.to_hex());
400+
let reference_path = format!("blobs/refs/{name}");
401+
self.symlink(&reference_path, &blob_path)?;
402+
Ok(())
403+
}
404+
405+
pub fn open_blob(&self, name: &str) -> Result<BlobReader<File, ObjectID>> {
406+
let fd = self
407+
.openat(&format!("blobs/{name}"), OFlags::RDONLY)
408+
.with_context(|| format!("Opening ref 'blobs/{name}'"))?;
409+
410+
if !name.contains("/") {
411+
// A name with no slashes in it is taken to be a sha256 fs-verity digest
412+
ensure_verity_equal(&fd, &ObjectID::from_hex(name)?)?;
413+
}
414+
415+
let file = File::from(fd);
416+
BlobReader::new(file)
417+
}
418+
365419
/// this function is not safe for untrusted users
366420
pub fn write_image(&self, name: Option<&str>, data: &[u8]) -> Result<ObjectID> {
367421
let object_id = self.ensure_object(data)?;
@@ -593,6 +647,17 @@ impl<ObjectID: FsVerityHashValue> Repository<ObjectID> {
593647
})?;
594648
}
595649

650+
for object in self.gc_category("blobs")? {
651+
println!("{object:?} lives as a blob");
652+
objects.insert(object.clone());
653+
654+
let blob = self.open_blob(&object.to_hex())?;
655+
for reference in blob.refs {
656+
println!(" with {reference:?}");
657+
objects.insert(reference.clone());
658+
}
659+
}
660+
596661
for first_byte in 0x0..=0xff {
597662
let dirfd = match self.openat(
598663
&format!("objects/{first_byte:02x}"),

doc/blobs.md

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# Blobs
2+
3+
Blobs are a way to reference generic data objects other than images in
4+
the repository. The blobs are a way to reference data objects, which
5+
otherwise would be garbage collected.
6+
7+
Additionally, the blob file format allows the blob to references a
8+
list of other objects, that will also be kept alive. Other than this
9+
the blob format itself is wihtout structure, and it is up to each user
10+
to define this.
11+
12+
Similar to images, blobs can also be given names, which makes it
13+
easy to find them.
14+
15+
## Example uses
16+
17+
Blobs are used by composefs-ostree to store native ostree
18+
representations of ostree commits that can be converted to
19+
images, but also used as a source for previous data when
20+
updating an ostree commit.
21+
22+
## File format
23+
24+
The file format consists of a header, plus one data block for the
25+
remainder of the file.
26+
27+
### Header
28+
29+
The file starts with a magic number, 0xAFE138C18C463EF3 as a u64 in
30+
little endian. After that comes a single u64 le integer which
31+
specified the number of references other objects. Directl after that,
32+
the references objects are stored by fsverity hash value (32/64
33+
bytes).
34+
35+
The remainder of the file is of unspecified format.

doc/repository.md

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,10 @@ composefs
4040
│   ├── 4e67eaccd9fd[...] -> ../objects/4e/67eaccd9fd[...]
4141
│   └── refs
4242
│   └── some/name -> ../../images/4e67eaccd9fd[...]
43+
├── blobs
44+
│   ├── bfa4c6a073a1[...] -> ../objects/bf/a4c6a073a1[...]
45+
│   └── refs
46+
│   └── some/name -> ../../images/4e67eaccd9fd[...]
4347
└── streams
4448
├── 502b126bca0c[...] -> ../objects/50/2b126bca0c[...]
4549
└── refs
@@ -86,10 +90,17 @@ repository as a splitstream, the resulting filename in this directory will have
8690
no relation to the original content. You can, however, store a reference for
8791
it.
8892

89-
## `{images,streams}/refs/`
93+
## `blobs/`
94+
95+
This is where [blobs](blob.md) are stored. As for the images,
96+
this is a bunch of 256bit symlinks which are symlinks to data in the object
97+
storage.
98+
99+
100+
## `{images,streams,blobs}/refs/`
90101

91-
This is where we record which images and streams are currently "requested" by
92-
some external user. When importing a tar file, in addition to creating the
102+
This is where we record which images, blobs and streams are currently "requested"
103+
by some external user. When importing e.g. a tar file, in addition to creating the
93104
file in the objects database and the toplevel symlink in the `streams/`
94105
directory, we also assign it a name which is chosen by the software which is
95106
performing the import.
@@ -126,9 +137,9 @@ prevent users from corrupting the layout of the repository. The reason for the
126137
acl is that read-only operations on the repository should be performed
127138
directly on the repository and not via some central agent.
128139

129-
## Referring to images and streams
140+
## Referring to images, blobs and streams
130141

131-
Operations that are performed on images or streams (mount, cat, etc.) name the
142+
Operations that are performed on images, blobs or streams (mount, cat, etc.) name the
132143
stream in one of two ways:
133144

134145
- via the user-chosen name such as `refs/1000/flatpak/some_id`
@@ -137,7 +148,7 @@ stream in one of two ways:
137148
ie: the name must either start with the string `refs/`, or must be a 64bit
138149
character hexidecimal string.
139150

140-
In both cases, the name is a path relative to the `images/` or `streams/`
151+
In both cases, the name is a path relative to the `images/`, `blobs/` or `streams/`
141152
directory and this path contains a symlink (either direct or indirect) to the
142153
underlying file in `objects/`.
143154

0 commit comments

Comments
 (0)