Skip to content

Commit 73bdcbd

Browse files
committed
chunking implemented
1 parent 91a07ce commit 73bdcbd

File tree

5 files changed

+258
-11
lines changed

5 files changed

+258
-11
lines changed

Cargo.lock

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,5 @@ reqwest = "0.12.23"
1313
serde = { version = "1.0.219", features = ["derive"] }
1414
serde_yaml = "0.9.34"
1515
tar = "0.4.44"
16+
temp-dir = "0.1.16"
1617
walkdir = "2.5.0"

src/chunks/hash.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
/// WARNING: Only Blake3 is currently implemented for the time being.
2+
pub enum HashKind {
3+
Blake3,
4+
Sha512,
5+
Sha256,
6+
}
7+
8+
pub fn hash(hash_kind: &HashKind, data: &[u8]) -> String {
9+
match hash_kind {
10+
HashKind::Blake3 => blake3::hash(data).to_hex().to_string(),
11+
HashKind::Sha512 => "".to_string(),
12+
HashKind::Sha256 => "".to_string(),
13+
}
14+
}

src/chunks/mod.rs

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,22 @@
1+
mod hash;
12
mod tree;
23

4+
use std::path::PathBuf;
5+
6+
pub use hash::HashKind;
37
pub use tree::*;
48

5-
#[derive(serde::Deserialize, serde::Serialize)]
6-
struct Chunk {
9+
#[derive(serde::Deserialize, serde::Serialize, Debug, Clone)]
10+
pub struct Chunk {
11+
/// Path
12+
path: PathBuf,
13+
714
/// Hash
8-
pub hash: String,
15+
hash: String,
916

1017
/// Unix mode permissions
1118
permissions: u32,
1219

13-
/// Expected size in kilobytes
14-
pub size: i64,
20+
/// Expected size in kilobytes, rounded.
21+
size: u64,
1522
}

src/chunks/tree.rs

Lines changed: 224 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,231 @@
1-
use std::path::Path;
1+
use anyhow::{Context, Result};
2+
use std::{fs, os::unix::fs::PermissionsExt, path::Path};
3+
use walkdir::WalkDir;
24

3-
use anyhow::Result;
5+
use crate::chunks::{Chunk, HashKind, hash::hash};
46

5-
use crate::chunks::Chunk;
7+
pub fn save_tree(
8+
tree_path: &Path,
9+
chunk_store_path: &Path,
10+
hash_kind: &HashKind,
11+
) -> Result<Vec<Chunk>> {
12+
let mut chunks = Vec::new();
613

7-
pub fn save_tree(load_path: &Path, chunk_store_path: &Path) -> Result<Vec<Chunk>> {
8-
Ok(Vec::new())
14+
for entry in WalkDir::new(tree_path) {
15+
let file = entry?;
16+
17+
if !file.file_type().is_file() {
18+
continue;
19+
}
20+
21+
let path = file.path().strip_prefix(tree_path)?.to_path_buf();
22+
let contents = fs::read(file.path())?;
23+
let size = (contents.len() as u64) / 1024;
24+
let hash = hash(hash_kind, &contents);
25+
let mode = file.metadata()?.permissions().mode() & 0o777;
26+
27+
// TODO: Make this hardlink if on the same filesystem.
28+
fs::write(
29+
chunk_store_path.join(get_chunk_filename(&hash, mode)),
30+
contents,
31+
)?;
32+
33+
chunks.push(Chunk {
34+
hash,
35+
path,
36+
size,
37+
permissions: mode,
38+
});
39+
}
40+
41+
Ok(chunks)
942
}
1043

11-
pub fn load_tree(load_path: &Path, chunk_store_path: &Path, chunks: &Vec<Chunk>) -> Result<()> {
44+
/// Turns a tree into a list of chunks
45+
pub fn load_tree(load_path: &Path, chunk_store_path: &Path, chunks: &[Chunk]) -> Result<()> {
46+
for chunk in chunks {
47+
let extracted_path = load_path.join(&chunk.path);
48+
let chunk_path = chunk_store_path.join(get_chunk_filename(&chunk.hash, chunk.permissions));
49+
50+
// Create parent path
51+
if let Some(parent) = extracted_path.parent() {
52+
fs::create_dir_all(parent)?;
53+
}
54+
55+
if fs::hard_link(&chunk_path, &extracted_path).is_err() {
56+
fs::copy(&chunk_path, &extracted_path)
57+
.with_context(|| "Could not copy data while extracting")?;
58+
};
59+
60+
let mut perms = fs::metadata(&extracted_path)?.permissions();
61+
perms.set_mode(chunk.permissions & 0o777);
62+
fs::set_permissions(&extracted_path, perms)?;
63+
}
64+
1265
Ok(())
1366
}
67+
68+
/// Returns the tree's estimated size in kilobytes.
69+
pub fn estimate_tree_size(chunks: &[Chunk]) -> u64 {
70+
let mut size: u64 = 0;
71+
72+
chunks.iter().for_each(|chunk| size += chunk.size);
73+
74+
size
75+
}
76+
77+
fn get_chunk_filename(hash: &str, permissions: u32) -> String {
78+
let mut new_hash = hash.to_string();
79+
80+
new_hash.push_str(&permissions.to_string());
81+
82+
new_hash
83+
}
84+
85+
#[cfg(test)]
86+
mod tests {
87+
use std::os::unix::fs::MetadataExt;
88+
89+
use super::*;
90+
91+
use temp_dir::TempDir;
92+
93+
#[test]
94+
fn get_chunk_filename_stability() {
95+
let hash = "a8sf799a8s6fa7f5";
96+
let permissions = 0o777;
97+
98+
assert_eq!(get_chunk_filename(hash, permissions), "a8sf799a8s6fa7f5511");
99+
}
100+
101+
#[test]
102+
fn test_save_tree() -> Result<()> {
103+
let initial_tree_path = TempDir::new()?;
104+
let chunk_store_path = TempDir::new()?;
105+
let hash_kind = &HashKind::Blake3;
106+
107+
// Create example tree
108+
fs::write(initial_tree_path.path().join("file"), "Example")?;
109+
fs::create_dir(initial_tree_path.path().join("path"))?;
110+
fs::write(initial_tree_path.path().join("path/file"), "Example2")?;
111+
112+
let chunks = save_tree(initial_tree_path.path(), chunk_store_path.path(), hash_kind)?;
113+
114+
// Check that the correct number of chunks were created
115+
assert_eq!(chunks.len(), 2);
116+
117+
// Check that the chunk hashes exist in the chunk store
118+
for chunk in &chunks {
119+
let chunk_path = chunk_store_path
120+
.path()
121+
.join(get_chunk_filename(&chunk.hash, chunk.permissions));
122+
assert!(
123+
chunk_path.exists(),
124+
"Chunk file does not exist: {:?}",
125+
chunk_path
126+
);
127+
}
128+
129+
// Check that the chunk paths are correct
130+
let chunk_paths: Vec<_> = chunks
131+
.iter()
132+
.map(|c| c.path.to_string_lossy().to_string())
133+
.collect();
134+
assert!(chunk_paths.contains(&"file".to_string()));
135+
assert!(chunk_paths.contains(&"path/file".to_string()));
136+
137+
// Check that the estimated size is correct (in KB)
138+
let expected_size = (b"Example".len() as u64) / 1024 + (b"Example2".len() as u64) / 1024;
139+
assert_eq!(estimate_tree_size(&chunks), expected_size);
140+
141+
Ok(())
142+
}
143+
144+
#[test]
145+
fn test_load_tree() -> Result<()> {
146+
let initial_tree_path = TempDir::new()?;
147+
let loaded_tree_path = TempDir::new()?;
148+
let chunk_store_path = TempDir::new()?;
149+
let hash_kind = &HashKind::Blake3;
150+
151+
// Create example tree
152+
fs::write(initial_tree_path.path().join("file"), "Example")?;
153+
fs::create_dir(initial_tree_path.path().join("path"))?;
154+
fs::write(initial_tree_path.path().join("path/file"), "Example2")?;
155+
156+
let chunks = save_tree(initial_tree_path.path(), chunk_store_path.path(), hash_kind)?;
157+
158+
load_tree(loaded_tree_path.path(), chunk_store_path.path(), &chunks)?;
159+
160+
assert_eq!(
161+
fs::read_to_string(loaded_tree_path.path().join("file"))?,
162+
"Example"
163+
);
164+
165+
assert_eq!(
166+
fs::read_to_string(loaded_tree_path.path().join("path/file"))?,
167+
"Example2"
168+
);
169+
170+
Ok(())
171+
}
172+
173+
#[test]
174+
fn test_permissions() -> Result<()> {
175+
let initial_tree_path = TempDir::new()?;
176+
let loaded_tree_path = TempDir::new()?;
177+
let chunk_store_path = TempDir::new()?;
178+
let hash_kind = &HashKind::Blake3;
179+
180+
// Create example tree
181+
let file_path = initial_tree_path.path().join("file");
182+
fs::write(&file_path, "Example")?;
183+
let mut perms = fs::metadata(&file_path)?.permissions();
184+
perms.set_mode(0o700);
185+
fs::set_permissions(&file_path, perms)?;
186+
187+
let file_path = initial_tree_path.path().join("file2");
188+
fs::write(&file_path, "Example")?;
189+
let mut perms2 = fs::metadata(&file_path)?.permissions();
190+
perms2.set_mode(0o600);
191+
fs::set_permissions(&file_path, perms2)?;
192+
193+
let chunks = save_tree(initial_tree_path.path(), chunk_store_path.path(), hash_kind)?;
194+
195+
load_tree(loaded_tree_path.path(), chunk_store_path.path(), &chunks)?;
196+
197+
assert_eq!(
198+
fs::metadata(loaded_tree_path.path().join("file"))?.mode() & 0o777,
199+
0o700
200+
);
201+
202+
assert_eq!(
203+
fs::metadata(loaded_tree_path.path().join("file2"))?.mode() & 0o777,
204+
0o600
205+
);
206+
207+
Ok(())
208+
}
209+
210+
#[test]
211+
fn test_tree_size() -> Result<()> {
212+
let initial_tree_path = TempDir::new()?;
213+
let chunk_store_path = TempDir::new()?;
214+
let hash_kind = &HashKind::Blake3;
215+
216+
let kb1 = vec![0; 1024];
217+
let kb4 = vec![0; 4096];
218+
219+
// Create example tree
220+
fs::write(initial_tree_path.path().join("file"), kb1)?;
221+
fs::create_dir(initial_tree_path.path().join("path"))?;
222+
fs::write(initial_tree_path.path().join("path/file"), kb4)?;
223+
224+
let chunks = save_tree(initial_tree_path.path(), chunk_store_path.path(), hash_kind)?;
225+
226+
// Check that the estimated size is correct (in KB)
227+
assert_eq!(estimate_tree_size(&chunks), 5);
228+
229+
Ok(())
230+
}
231+
}

0 commit comments

Comments
 (0)