Skip to content

Commit 3503796

Browse files
authored
Use canonical JSON for action hashing (#80)
* Hash actions via canonical JSON * Stream canonical JSON into action hashes
1 parent 56d5aec commit 3503796

File tree

7 files changed

+97
-104
lines changed

7 files changed

+97
-104
lines changed

Cargo.lock

Lines changed: 18 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ itertools = "0.12"
1717
tracing = "0.1"
1818
tracing-subscriber = { version = "0.3", features = ["fmt"] }
1919
serde_json = "1"
20+
serde_json_canonicalizer = "0.3"
2021
tempfile = "3.8.0"
2122

2223
[lints.clippy]

docs/netsuke-design.md

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -913,7 +913,8 @@ use std::path::{Path, PathBuf};
913913
/// The complete, static build graph.
914914
pub struct BuildGraph {
915915
/// A map of all unique actions (rules) in the build.
916-
/// The key is a hash of the action's properties to enable deduplication.
916+
/// The key is a hash of a canonical JSON serialisation of the action's
917+
/// properties to enable deduplication.
917918
pub actions: HashMap<String, Action>,
918919
919920
/// A map of all target files to be built. The key is the output path.
@@ -1109,9 +1110,10 @@ action identifier and carries the `phony` and `always` flags verbatim from the
11091110
manifest. No Ninja specific placeholders are stored in the IR to keep the
11101111
representation portable.
11111112

1112-
- Actions are deduplicated using a SHA-256 hash of their recipe and metadata.
1113-
Identical commands therefore share the same identifier which keeps the IR
1114-
deterministic for snapshot tests.
1113+
- Actions are deduplicated using a SHA-256 hash of a canonical JSON
1114+
serialisation of their recipe and metadata. Identical commands therefore
1115+
share the same identifier which keeps the IR deterministic for snapshot
1116+
tests.
11151117
- Multiple rule references in a single target are not yet supported. The IR
11161118
generator reports `IrGenError::MultipleRules` when encountered.
11171119
- Duplicate output files are rejected. Attempting to define the same output
@@ -1261,29 +1263,27 @@ libraries.[^27]
12611263
Rust
12621264

12631265
```rust
1264-
// In src/ir.rs
1265-
use thiserror::Error;
1266-
use std::path::PathBuf;
1266+
// In src/ir.rs use thiserror::Error; use std::path::PathBuf;
12671267

12681268
#[derive(Debug, Error)]
12691269
pub enum IrGenError {
1270-
#[error("rule not found: {rule_name} for target {target_name}")]
1271-
RuleNotFound {
1272-
target_name: String,
1273-
rule_name: String,
1274-
},
1270+
#[error("rule '{rule_name}' referenced by target '{target_name}' was not found")]
1271+
RuleNotFound { target_name: String, rule_name: String },
1272+
1273+
#[error("multiple rules for target '{target_name}': {rules:?}")]
1274+
MultipleRules { target_name: String, rules: Vec<String> },
1275+
1276+
#[error("No rules specified for target {target_name}")]
1277+
EmptyRule { target_name: String },
1278+
1279+
#[error("duplicate target outputs: {outputs:?}")]
1280+
DuplicateOutput { outputs: Vec<String> },
12751281

12761282
#[error("circular dependency detected: {cycle:?}")]
1277-
CircularDependency {
1278-
cycle: Vec<PathBuf>,
1279-
},
1280-
1281-
#[error("dependency not found: {dependency_name} for target {target_name}")]
1282-
DependencyNotFound {
1283-
target_name: String,
1284-
dependency_name: String,
1285-
},
1286-
}
1283+
CircularDependency { cycle: Vec<PathBuf> },
1284+
1285+
#[error("failed to serialise action: {0}")]
1286+
ActionSerialisation(#[from] serde_json::Error), }
12871287
```
12881288

12891289
- `anyhow`: This crate will be used in the main application logic (`main.rs`)

src/hasher.rs

Lines changed: 28 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
//! Action hashing utilities.
22
//!
3-
//! This module provides the [`ActionHasher`] type used to compute a stable
4-
//! SHA-256 digest for [`Action`] definitions. The hash is used to deduplicate
5-
//! identical actions when generating the build graph.
3+
//! [`ActionHasher`] computes stable SHA-256 digests for [`Action`] definitions.
4+
//! Each action is serialised to canonical JSON before hashing, ensuring
5+
//! identical actions map to the same digest even as the struct evolves.
66
//!
77
//! # Examples
88
//!
@@ -23,80 +23,43 @@
2323
//! assert!(!hash.is_empty());
2424
//! ```
2525
26-
use itoa::Buffer;
2726
use sha2::{Digest, Sha256};
2827

29-
use crate::ast::{Recipe, StringOrList};
3028
use crate::ir::Action;
29+
use serde_json_canonicalizer::to_writer;
30+
use std::io::{self, Write};
3131

3232
/// Computes stable digests for [`Action`] definitions.
3333
pub struct ActionHasher;
3434

35-
impl ActionHasher {
36-
/// Calculate the hash of an [`Action`].
37-
#[must_use]
38-
pub fn hash(action: &Action) -> String {
39-
let mut hasher = Sha256::new();
40-
Self::hash_recipe(&mut hasher, &action.recipe);
41-
Self::hash_optional_fields(&mut hasher, action);
42-
format!("{:x}", hasher.finalize())
43-
}
44-
45-
fn hash_recipe(hasher: &mut Sha256, recipe: &Recipe) {
46-
match recipe {
47-
Recipe::Command { command } => {
48-
hasher.update(b"cmd");
49-
Self::update_with_len(hasher, command.as_bytes());
50-
}
51-
Recipe::Script { script } => {
52-
hasher.update(b"scr");
53-
Self::update_with_len(hasher, script.as_bytes());
54-
}
55-
Recipe::Rule { rule } => {
56-
hasher.update(b"rule");
57-
Self::hash_rule_reference(hasher, rule);
58-
}
59-
}
60-
}
35+
struct DigestWriter<'a, D: Digest>(&'a mut D);
6136

62-
fn hash_optional_fields(hasher: &mut Sha256, action: &Action) {
63-
Self::hash_optional_string(hasher, action.description.as_ref());
64-
Self::hash_optional_string(hasher, action.depfile.as_ref());
65-
Self::hash_optional_string(hasher, action.deps_format.as_ref());
66-
Self::hash_optional_string(hasher, action.pool.as_ref());
67-
hasher.update(if action.restat { b"1" } else { b"0" });
37+
impl<D: Digest> Write for DigestWriter<'_, D> {
38+
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
39+
self.0.update(buf);
40+
Ok(buf.len())
6841
}
6942

70-
fn hash_rule_reference(hasher: &mut Sha256, rule: &StringOrList) {
71-
match rule {
72-
StringOrList::String(r) => Self::update_with_len(hasher, r.as_bytes()),
73-
StringOrList::List(list) => {
74-
// Preserve the original sequence so that different orders
75-
// generate distinct hashes.
76-
for r in list {
77-
Self::update_with_len(hasher, r.as_bytes());
78-
}
79-
}
80-
StringOrList::Empty => {}
81-
}
43+
fn flush(&mut self) -> io::Result<()> {
44+
Ok(())
8245
}
46+
}
8347

84-
fn hash_optional_string(hasher: &mut Sha256, value: Option<&String>) {
85-
match value {
86-
Some(v) => {
87-
hasher.update(b"1");
88-
Self::update_with_len(hasher, v.as_bytes());
89-
}
90-
None => hasher.update(b"0"),
48+
impl ActionHasher {
49+
/// Calculate the hash of an [`Action`].
50+
///
51+
/// Returns a lowercase hex-encoded SHA-256 of the action's canonical JSON.
52+
///
53+
/// # Errors
54+
///
55+
/// Returns an error if the action cannot be serialised to JSON.
56+
pub fn hash(action: &Action) -> Result<String, serde_json::Error> {
57+
let mut hasher = Sha256::new();
58+
{
59+
// Canonical JSON: compact formatting with sorted keys.
60+
let mut writer = DigestWriter(&mut hasher);
61+
to_writer(action, &mut writer)?;
9162
}
92-
}
93-
94-
fn update_with_len(hasher: &mut Sha256, bytes: &[u8]) {
95-
// Write the length prefix into a stack buffer to avoid heap allocation.
96-
let mut buf = Buffer::new();
97-
let len_str = buf.format(bytes.len());
98-
hasher.update(len_str.as_bytes());
99-
hasher.update(b":");
100-
hasher.update(bytes);
63+
Ok(format!("{:x}", hasher.finalize()))
10164
}
10265
}

src/ir.rs

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -39,12 +39,18 @@ pub struct BuildGraph {
3939
}
4040

4141
/// A reusable command analogous to a Ninja rule.
42-
#[derive(Debug, Clone, PartialEq)]
42+
use serde::Serialize;
43+
44+
#[derive(Debug, Clone, PartialEq, Serialize)]
4345
pub struct Action {
4446
pub recipe: Recipe,
47+
#[serde(skip_serializing_if = "Option::is_none")]
4548
pub description: Option<String>,
49+
#[serde(skip_serializing_if = "Option::is_none")]
4650
pub depfile: Option<String>,
51+
#[serde(skip_serializing_if = "Option::is_none")]
4752
pub deps_format: Option<String>,
53+
#[serde(skip_serializing_if = "Option::is_none")]
4854
pub pool: Option<String>,
4955
pub restat: bool,
5056
}
@@ -96,6 +102,9 @@ pub enum IrGenError {
96102

97103
#[error("circular dependency detected: {cycle:?}")]
98104
CircularDependency { cycle: Vec<PathBuf> },
105+
106+
#[error("failed to serialise action: {0}")]
107+
ActionSerialisation(#[from] serde_json::Error),
99108
}
100109

101110
impl BuildGraph {
@@ -109,7 +118,7 @@ impl BuildGraph {
109118
let mut graph = Self::default();
110119
let mut rule_map = HashMap::new();
111120

112-
Self::process_rules(manifest, &mut graph.actions, &mut rule_map);
121+
Self::process_rules(manifest, &mut graph.actions, &mut rule_map)?;
113122
Self::process_targets(manifest, &mut graph.actions, &mut graph.targets, &rule_map)?;
114123
Self::process_defaults(manifest, &mut graph.default_targets);
115124

@@ -122,11 +131,12 @@ impl BuildGraph {
122131
manifest: &NetsukeManifest,
123132
actions: &mut HashMap<String, Action>,
124133
rule_map: &mut HashMap<String, String>,
125-
) {
134+
) -> Result<(), IrGenError> {
126135
for rule in &manifest.rules {
127-
let hash = register_action(actions, rule.recipe.clone(), rule.description.clone());
136+
let hash = register_action(actions, rule.recipe.clone(), rule.description.clone())?;
128137
rule_map.insert(rule.name.clone(), hash);
129138
}
139+
Ok(())
130140
}
131141

132142
fn process_targets(
@@ -141,7 +151,7 @@ impl BuildGraph {
141151
let action_id = match &target.recipe {
142152
Recipe::Rule { rule } => resolve_rule(rule, rule_map, &target_name)?,
143153
Recipe::Command { .. } | Recipe::Script { .. } => {
144-
register_action(actions, target.recipe.clone(), None)
154+
register_action(actions, target.recipe.clone(), None)?
145155
}
146156
};
147157

@@ -183,7 +193,7 @@ fn register_action(
183193
actions: &mut HashMap<String, Action>,
184194
recipe: Recipe,
185195
description: Option<String>,
186-
) -> String {
196+
) -> Result<String, IrGenError> {
187197
let action = Action {
188198
recipe,
189199
description,
@@ -192,9 +202,9 @@ fn register_action(
192202
pool: None,
193203
restat: false,
194204
};
195-
let hash = ActionHasher::hash(&action);
205+
let hash = ActionHasher::hash(&action).map_err(IrGenError::ActionSerialisation)?;
196206
actions.entry(hash.clone()).or_insert(action);
197-
hash
207+
Ok(hash)
198208
}
199209

200210
fn map_string_or_list<T, F>(sol: &StringOrList, f: F) -> Vec<T>

tests/hasher_tests.rs

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ use rstest::rstest;
1515
pool: None,
1616
restat: false,
1717
},
18-
"a0f6e2cd3b9b3cee0bf94a7d53bce56cf4178dfe907bb1cb7c832f47846baf38"
18+
"0fe3670f0746dcec34768df158d814ac099e416b6045e7e213d0aabd6aa761cb"
1919
)]
2020
#[case(
2121
Action {
@@ -26,7 +26,7 @@ use rstest::rstest;
2626
pool: None,
2727
restat: true,
2828
},
29-
"cf8e97357820acf6f66037dcf977ee36c88c2811d60342db30c99507d24a0d60"
29+
"9b0289f92ea0e374eecdaf50c8c9080547635aaff38d07fe2a278af6894c3207"
3030
)]
3131
#[case(
3232
Action {
@@ -37,7 +37,7 @@ use rstest::rstest;
3737
pool: None,
3838
restat: false,
3939
},
40-
"69f72afccc2aa5a709af1139a9c7ef5f4f72e57cf5376e6c043e575f68f2ef8d"
40+
"9733343b512253e636fbacfea40ef4f5771d49409fcda026aec7c7ce2f5405ec"
4141
)]
4242
#[case(
4343
Action {
@@ -48,7 +48,7 @@ use rstest::rstest;
4848
pool: None,
4949
restat: false,
5050
},
51-
"c28b5c0b7f20bf1093cbab990976b904268f173413f54b7007166b2c02f498f3"
51+
"9b53c477668394e59eca5b34416ef7ad7fb5799ca96dd283e81d7acda6c56006"
5252
)]
5353
#[case(
5454
Action {
@@ -59,7 +59,7 @@ use rstest::rstest;
5959
pool: None,
6060
restat: false,
6161
},
62-
"28adc0857704aa0c54c3bc624cb2dc70c101c3936987b20ae520a20319f591c2"
62+
"57023b1c00f7daf410d3d2077346e38014d3612c278aadef73a8484c94bdcb77"
6363
)]
6464
// Order of rule names influences the digest.
6565
#[case(
@@ -71,8 +71,9 @@ use rstest::rstest;
7171
pool: None,
7272
restat: true,
7373
},
74-
"b93ff0102089f1f1a3fe9eec082b59d5aab58271a40724ccdfdaade6a68fe340"
74+
"d5f1a262a95b75db3a7a79a5855eb27b6b430833e7ba93538502a16ebd03f50b"
7575
)]
7676
fn hash_action_is_stable(#[case] action: Action, #[case] expected: &str) {
77-
assert_eq!(ActionHasher::hash(&action), expected);
77+
let digest = ActionHasher::hash(&action).expect("hash action");
78+
assert_eq!(digest, expected);
7879
}

tests/snapshots/ninja/ninja_snapshot_tests__touch_manifest_ninja.snap

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
source: tests/ninja_snapshot_tests.rs
33
expression: ninja_content
44
---
5-
rule ca3067639652d0018b982cd2fc8262e3a02f4404f60148b8493de0f656d9b1a2
5+
rule d3cc8be04150cb4e2d9ccbdbe94cf9f2e8ade54bb4701b8faf99cafeb456a75d
66
command = python3 -c 'import os,sys; open(sys.argv[1],"a").close()' $out
77

8-
build out/a: ca3067639652d0018b982cd2fc8262e3a02f4404f60148b8493de0f656d9b1a2 in/a
8+
build out/a: d3cc8be04150cb4e2d9ccbdbe94cf9f2e8ade54bb4701b8faf99cafeb456a75d in/a

0 commit comments

Comments
 (0)