Skip to content

Commit 425debc

Browse files
committed
add content-addressable crate
Signed-off-by: Dave Grantham <dwh@linuxprogrammer.org>
1 parent 81ddfda commit 425debc

File tree

23 files changed

+1654
-18
lines changed

23 files changed

+1654
-18
lines changed

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ serde_json = { version = "1.0.104"}
5050
serde_test = { version = "1.0.104"}
5151
test-log = { version = "0.2.17", features = ["trace", "color"] }
5252
thiserror = "2.0.12"
53+
tokio = { version = "1.44.2", features = ["fs", "io-util", "macros", "rt", "test-util"] }
5354
tracing = "0.1.41"
5455
tracing-subscriber = { version = "0.3.19", features = ["env-filter"] }
5556
unsigned-varint = { version = "0.8.0", features = ["std"] }
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
[package]
2+
name = "content-addressable"
3+
version.workspace = true
4+
edition.workspace = true
5+
authors = ["Dave Grantham <dwg@linuxprogrammer.org>"]
6+
description = "Content addressable storage traits and implementations"
7+
readme = "README.md"
8+
license = "Apache-2.0"
9+
10+
[features]
11+
default = ["serde"]
12+
dag_cbor = ["serde_cbor", "serde_cbor/tags", "multicid/dag_cbor" ]
13+
14+
[dependencies]
15+
async-trait = "0.1.88"
16+
multibase.workspace = true
17+
multicid.workspace = true
18+
multicodec.workspace = true
19+
multihash.workspace = true
20+
multikey.workspace = true
21+
multitrait.workspace = true
22+
multiutil.workspace = true
23+
rand.workspace = true
24+
rand_core_6.workspace = true
25+
rng.workspace = true
26+
serde = { workspace = true, optional = true }
27+
serde_cbor = { workspace = true, optional = true }
28+
tempfile = "3.10.1"
29+
test-log.workspace = true
30+
thiserror.workspace = true
31+
tokio.workspace = true
32+
tracing.workspace = true
33+
34+
[dev-dependencies]
35+
hex.workspace = true
36+
rand.workspace = true
37+
rand_core_6.workspace = true
38+
rng.workspace = true
39+
serde_cbor.workspace = true
40+
serde_json.workspace = true
41+
serde_test.workspace = true
42+
43+
[lints]
44+
workspace = true
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
[![](https://img.shields.io/badge/made%20by-Cryptid%20Technologies-gold.svg?style=flat-square)][CRYPTID]
2+
[![](https://img.shields.io/badge/project-provenance-purple.svg?style=flat-square)][PROVENANCE]
3+
[![](https://img.shields.io/badge/project-multiformats-blue.svg?style=flat-square)][MULTIFORMATS]
4+
![](https://github.com/cryptidtech/multicid/actions/workflows/rust.yml/badge.svg)
5+
6+
# Content Addressable
7+
8+
A Rust implementation of content addressable storage abstractions using
9+
[multiformats][MULTIFORMATS] [content identifiers (CID)][CID] as the content
10+
address container.
11+
12+
## Current Status
13+
14+
This crate provides a set of abstractions for resolving CIDs into data blocks,
15+
VLADs into CIDs, and Multikeys into CIDs. Currently the only implementation
16+
uses the local file system for storage.
17+
18+
[CRYPTID]: https://cryptid.tech/
19+
[PROVENANCE]: https://github.com/cryptidtech/provenance-specifications/
20+
[MULTIFORMATS]: https://github.com/multiformats/multiformats/
21+
[CID]: https://docs.ipfs.tech/concepts/content-addressing/
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
use async_trait::async_trait;
3+
4+
/// Abstract block that abstracts away the Cid calculation
5+
#[async_trait]
6+
pub trait Block<'a, K>: Send + Sync {
7+
/// Return a reference to the data
8+
async fn data(&'a self) -> &'a [u8];
9+
10+
/// Get the name of the block
11+
async fn key(&self) -> K;
12+
}
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
use crate::Block;
3+
use async_trait::async_trait;
4+
5+
/// Abstract block storage trait for getting and putting content addressed data
6+
#[async_trait]
7+
pub trait Blocks<'a, 'b, K> {
8+
/// The error type returned
9+
type Error;
10+
11+
/// Try to confirm a block exists
12+
async fn exists(&self, key: &K) -> Result<bool, Self::Error>;
13+
14+
/// Try to get a block from its content address
15+
async fn get(&self, key: &K) -> Result<impl Block<'b, K>, Self::Error>;
16+
17+
/// Try to put a block into storage
18+
async fn put(&mut self, block: &'a impl Block<'a, K>) -> Result<K, Self::Error>;
19+
20+
/// Try to remove a block from storage
21+
async fn rm(&self, key: &K) -> Result<impl Block<'b, K>, Self::Error>;
22+
}
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
use async_trait::async_trait;
3+
use multicid::Cid;
4+
5+
/// Abstract mapping from an arbitrary key to a Cid
6+
#[async_trait]
7+
pub trait Cids<K> {
8+
/// The error type returned
9+
type Error;
10+
11+
/// Try to confirm a key exists
12+
async fn exists(&self, key: &K) -> Result<bool, Self::Error>;
13+
14+
/// Try to get a Cid from its key
15+
async fn get(&self, key: &K) -> Result<Cid, Self::Error>;
16+
17+
/// Try to put a key and Cid into the map, returns the previous Cid value if it exists
18+
async fn put(&mut self, key: &K, cid: &Cid) -> Result<Option<Cid>, Self::Error>;
19+
20+
/// Try to remove a key and Cid form the map
21+
async fn rm(&self, key: &K) -> Result<Cid, Self::Error>;
22+
}
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
// SPDX-License-Idnetifier: Apache-2.0
2+
use crate::fs;
3+
4+
/// Errors created by this library
5+
#[derive(Debug, thiserror::Error)]
6+
#[non_exhaustive]
7+
pub enum Error {
8+
/// formatting error
9+
#[error(transparent)]
10+
Fmt(#[from] std::fmt::Error),
11+
/// I/O error
12+
#[error(transparent)]
13+
Io(#[from] std::io::Error),
14+
/// Persist error
15+
#[error(transparent)]
16+
Persist(#[from] tempfile::PersistError),
17+
18+
/// A multicid error
19+
#[error(transparent)]
20+
Multicid(#[from] multicid::Error),
21+
/// A multicodec error
22+
#[error(transparent)]
23+
Multicodec(#[from] multicodec::Error),
24+
/// A multihash error
25+
#[error(transparent)]
26+
Multihash(#[from] multihash::Error),
27+
/// A multikey error
28+
#[error(transparent)]
29+
Multikey(#[from] multikey::Error),
30+
/// A multitrait error
31+
#[error(transparent)]
32+
Multitrait(#[from] multitrait::Error),
33+
/// A multiutil error
34+
#[error(transparent)]
35+
Multiutil(#[from] multiutil::Error),
36+
/// An Fs error
37+
#[error(transparent)]
38+
Fs(#[from] fs::Error),
39+
40+
/// A custom error for callback functions
41+
#[error("Custom error: {0}")]
42+
Custom(String),
43+
/// A wraps any error
44+
#[error(transparent)]
45+
Wrapped(#[from] Box<dyn std::error::Error>),
46+
}
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
3+
/// Filesystem backed data block
4+
pub mod block;
5+
pub use block::{Block, Builder as BlockBuilder};
6+
7+
/// Filesystem backed block storage
8+
pub mod blocks;
9+
pub use blocks::{Blocks, Builder as BlocksBuilder};
10+
11+
/// Filesystem errors
12+
pub mod error;
13+
pub use error::Error;
14+
15+
/// Abstract map to CIDs
16+
pub mod cidmap;
17+
pub use cidmap::CidMap;
18+
19+
/// Filesystem backed multikey to cid mapping
20+
pub mod mkmap;
21+
pub use mkmap::{Builder as MkMapBuilder, MkMap};
22+
23+
/// Generic content addressable storage
24+
pub mod storage;
25+
pub use storage::Storage;
26+
27+
/// Filesystem backed vlad to cid mapping
28+
pub mod vladmap;
29+
pub use vladmap::{Builder as VladMapBuilder, VladMap};
30+
31+
/*
32+
/// Filesystem backed multikey_map storage
33+
pub mod fsmultikey_map;
34+
pub use fsmultikey_map::FsMultikeyMap;
35+
36+
*/
37+
38+
/// Simple way to import all public symbols
39+
pub mod prelude {
40+
pub use super::*;
41+
}
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
use crate::{Block as TBlock, Error};
3+
use async_trait::async_trait;
4+
use multibase::Base;
5+
use multicid::{cid, Cid, EncodedCid};
6+
use multicodec::Codec;
7+
use multihash::mh;
8+
use multiutil::{BaseEncoder, DetectedEncoder, EncodingInfo};
9+
use std::marker::Unpin;
10+
use tokio::io::{AsyncRead, AsyncReadExt};
11+
12+
/// The hash function we used when hashing blocks
13+
pub const BLOCK_HASH: Codec = Codec::Blake2B256;
14+
15+
/// Filesystem stored block
16+
#[derive(Clone, Debug, PartialEq)]
17+
pub struct Block {
18+
/// The block cid
19+
pub key: EncodedCid,
20+
/// The block data
21+
pub data: Vec<u8>,
22+
}
23+
24+
#[async_trait]
25+
impl<'a> TBlock<'a, EncodedCid> for Block {
26+
/// Return a reference to the data
27+
async fn data(&'a self) -> &'a [u8] {
28+
&self.data
29+
}
30+
31+
/// Get the name of the block
32+
async fn key(&self) -> EncodedCid {
33+
self.key.clone()
34+
}
35+
}
36+
37+
/// Builder for creating blocks from readers
38+
#[derive(Clone, Debug)]
39+
pub struct Builder<R: AsyncRead> {
40+
/// The base encoding
41+
base: Option<Base>,
42+
// The block cid
43+
cid: Option<Cid>,
44+
// The reader for the block data
45+
reader: R,
46+
}
47+
48+
impl<R: AsyncRead + Unpin> Builder<R> {
49+
/// Create a new block builder
50+
pub fn new(reader: R) -> Self {
51+
Self {
52+
base: None,
53+
cid: None,
54+
reader,
55+
}
56+
}
57+
58+
/// Set the base encoding
59+
pub fn base(mut self, base: Base) -> Self {
60+
self.base = Some(base);
61+
self
62+
}
63+
64+
/// Set the cid
65+
pub fn cid(mut self, cid: Cid) -> Self {
66+
self.cid = Some(cid);
67+
self
68+
}
69+
70+
/// Build the block
71+
pub async fn try_build(mut self) -> Result<Block, Error> {
72+
// get the base encoding
73+
let base = self.base.unwrap_or(DetectedEncoder::preferred_encoding(
74+
Cid::preferred_encoding(),
75+
));
76+
77+
// read the data from the reader
78+
let mut data = Vec::new();
79+
self.reader.read_to_end(&mut data).await?;
80+
81+
// calculate the cid from the data if not provided
82+
let cid = match self.cid {
83+
Some(cid) => cid,
84+
None => cid::Builder::new(Codec::Cidv1)
85+
.with_target_codec(Codec::DagCbor)
86+
.with_hash(&mh::Builder::new_from_bytes(BLOCK_HASH, &data)?.try_build()?)
87+
.try_build()?,
88+
};
89+
90+
// encode the cid into the key
91+
let key = EncodedCid::new(base, cid);
92+
93+
Ok(Block { key, data })
94+
}
95+
}

0 commit comments

Comments
 (0)