Skip to content

Commit 6d96315

Browse files
FooBarWidgetmetajackmitchhentges
authored
Use different hashing functions for different file types (#1135)
* Use different hashing functions for different file types This adds a new special hasher for static libraries, which contain timestamps and other info that prevent them from being cachable in some cases. * Update src/util.rs Co-authored-by: Mitchell Hentges <[email protected]> Co-authored-by: Jack Moffitt <[email protected]> Co-authored-by: Mitchell Hentges <[email protected]>
1 parent d22f5f2 commit 6d96315

File tree

3 files changed

+81
-8
lines changed

3 files changed

+81
-8
lines changed

Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ required-features = ["dist-server"]
2323

2424
[dependencies]
2525
anyhow = "1.0"
26-
ar = { version = "0.9", optional = true }
26+
ar = "0.9"
2727
async-trait = "0.1"
2828
atty = "0.2.6"
2929
base64 = "0.13"
@@ -129,7 +129,7 @@ native-zlib = []
129129
# Enable features that require unstable features of Nightly Rust.
130130
unstable = []
131131
# Enables distributed support in the sccache client
132-
dist-client = ["ar", "flate2", "hyper", "hyperx", "reqwest", "url", "sha2"]
132+
dist-client = ["flate2", "hyper", "hyperx", "reqwest", "url", "sha2"]
133133
# Enables the sccache-dist binary
134134
dist-server = ["crossbeam-utils", "jsonwebtoken", "flate2", "hyperx", "libmount", "nix", "openssl", "reqwest", "rouille", "syslog", "void", "version-compare"]
135135
# Enables dist tests with external requirements

src/compiler/rust.rs

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ use crate::dist::pkg;
2525
#[cfg(feature = "dist-client")]
2626
use crate::lru_disk_cache::{LruCache, Meter};
2727
use crate::mock_command::{CommandCreatorSync, RunCommand};
28-
use crate::util::{fmt_duration_as_secs, hash_all, run_input_output, Digest};
28+
use crate::util::{fmt_duration_as_secs, hash_all, hash_all_archives, run_input_output, Digest};
2929
use crate::util::{ref_env, HashToDigest, OsStrExt};
3030
use filetime::FileTime;
3131
use log::Level::Trace;
@@ -1337,7 +1337,7 @@ where
13371337
// Hash the contents of the staticlibs listed on the commandline.
13381338
trace!("[{}]: hashing {} staticlibs", crate_name, staticlibs.len());
13391339
let abs_staticlibs = staticlibs.iter().map(|s| cwd.join(s)).collect::<Vec<_>>();
1340-
let staticlib_hashes = hash_all(&abs_staticlibs, pool);
1340+
let staticlib_hashes = hash_all_archives(&abs_staticlibs, pool);
13411341

13421342
let ((source_files, source_hashes, mut env_deps), extern_hashes, staticlib_hashes) = futures::try_join!(
13431343
source_files_and_hashes_and_env_deps,
@@ -2343,7 +2343,7 @@ mod test {
23432343
use itertools::Itertools;
23442344
use std::ffi::OsStr;
23452345
use std::fs::File;
2346-
use std::io::Write;
2346+
use std::io::{self, Write};
23472347
use std::sync::{Arc, Mutex};
23482348

23492349
fn _parse_arguments(arguments: &[String]) -> CompilerArguments<ParsedArguments> {
@@ -2953,13 +2953,28 @@ c:/foo/bar.rs:
29532953

29542954
#[test]
29552955
fn test_generate_hash_key() {
2956+
use ar::{Builder, Header};
29562957
drop(env_logger::try_init());
29572958
let f = TestFixture::new();
29582959
const FAKE_DIGEST: &str = "abcd1234";
2960+
const BAZ_O_SIZE: u64 = 1024;
29592961
// We'll just use empty files for each of these.
2960-
for s in ["foo.rs", "bar.rs", "bar.rlib", "libbaz.a"].iter() {
2962+
for s in ["foo.rs", "bar.rs", "bar.rlib"].iter() {
29612963
f.touch(s).unwrap();
29622964
}
2965+
// libbaz.a needs to be a valid archive.
2966+
create_file(f.tempdir.path(), "libbaz.a", |f| {
2967+
let mut builder = Builder::new(f);
2968+
let hdr = Header::new(b"baz.o".to_vec(), BAZ_O_SIZE);
2969+
builder.append(&hdr, io::repeat(0).take(BAZ_O_SIZE))?;
2970+
Ok(())
2971+
})
2972+
.unwrap();
2973+
let mut m = Digest::new();
2974+
m.update(b"baz.o");
2975+
m.update(&vec![0; BAZ_O_SIZE as usize]);
2976+
let libbaz_a_digest = m.finish();
2977+
29632978
let mut emit = HashSet::new();
29642979
emit.insert("link".to_string());
29652980
emit.insert("metadata".to_string());
@@ -3038,8 +3053,9 @@ c:/foo/bar.rs:
30383053
m.update(empty_digest.as_bytes());
30393054
// bar.rlib (extern crate, from externs)
30403055
m.update(empty_digest.as_bytes());
3041-
// libbaz.a (static library, from staticlibs)
3042-
m.update(empty_digest.as_bytes());
3056+
// libbaz.a (static library, from staticlibs), containing a single
3057+
// file, baz.o, consisting of 1024 bytes of zeroes.
3058+
m.update(libbaz_a_digest.as_bytes());
30433059
// Env vars
30443060
OsStr::new("CARGO_BLAH").hash(&mut HashToDigest { digest: &mut m });
30453061
m.update(b"=");

src/util.rs

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
// limitations under the License.
1414

1515
use crate::mock_command::{CommandChild, RunCommand};
16+
use ar::Archive;
1617
use blake3::Hasher as blake3_Hasher;
1718
use byteorder::{BigEndian, ByteOrder};
1819
use serde::Serialize;
@@ -22,6 +23,7 @@ use std::hash::Hasher;
2223
use std::io::prelude::*;
2324
use std::path::{Path, PathBuf};
2425
use std::process::{self, Stdio};
26+
use std::str;
2527
use std::time;
2628
use std::time::Duration;
2729

@@ -121,6 +123,61 @@ pub async fn hash_all(files: &[PathBuf], pool: &tokio::runtime::Handle) -> Resul
121123
Ok(hashes)
122124
}
123125

126+
/// Calculate the digest of each static library archive in `files` on background threads in
127+
/// `pool`.
128+
///
129+
/// The hash is calculated by adding the filename of each archive entry followed
130+
/// by its contents, ignoring headers and other file metadata. This primarily
131+
/// exists because Apple's `ar` tool inserts timestamps for each file with
132+
/// no way to disable this behavior.
133+
pub async fn hash_all_archives(
134+
files: &[PathBuf],
135+
pool: &tokio::runtime::Handle,
136+
) -> Result<Vec<String>> {
137+
let start = time::Instant::now();
138+
let count = files.len();
139+
let iter = files.iter().map(|path| {
140+
let path = path.clone();
141+
pool.spawn_blocking(move || -> Result<String> {
142+
let mut m = Digest::new();
143+
let reader = File::open(&path)
144+
.with_context(|| format!("Failed to open file for hashing: {:?}", path))?;
145+
let mut archive = Archive::new(reader);
146+
while let Some(entry) = archive.next_entry() {
147+
let entry = entry?;
148+
m.update(entry.header().identifier());
149+
update_from_reader(&mut m, entry)?;
150+
}
151+
Ok(m.finish())
152+
})
153+
});
154+
155+
let mut hashes = futures::future::try_join_all(iter).await?;
156+
if let Some(i) = hashes.iter().position(|res| res.is_err()) {
157+
return Err(hashes.swap_remove(i).unwrap_err());
158+
}
159+
160+
trace!(
161+
"Hashed {} files in {}",
162+
count,
163+
fmt_duration_as_secs(&start.elapsed())
164+
);
165+
Ok(hashes.into_iter().map(|res| res.unwrap()).collect())
166+
}
167+
168+
/// Update the digest `m` with all data from `reader`.
169+
fn update_from_reader<R: Read>(m: &mut Digest, mut reader: R) -> Result<()> {
170+
loop {
171+
let mut buffer = [0; 1024];
172+
let count = reader.read(&mut buffer[..])?;
173+
if count == 0 {
174+
break;
175+
}
176+
m.update(&buffer[..count]);
177+
}
178+
Ok(())
179+
}
180+
124181
/// Format `duration` as seconds with a fractional component.
125182
pub fn fmt_duration_as_secs(duration: &Duration) -> String {
126183
format!("{}.{:03} s", duration.as_secs(), duration.subsec_millis())

0 commit comments

Comments
 (0)