Skip to content

Commit f8a35de

Browse files
authored
Never cache errors on disk
Update Cache::store and Cache::load to skip CacheStatus::Error. As a result, failures are always rechecked on subsequent runs. We completely sidestep issues with caching transient network errors or missing HTTP status codes. Fixes #1734. Fixes #1783.
1 parent 48663cb commit f8a35de

File tree

2 files changed

+96
-12
lines changed

2 files changed

+96
-12
lines changed

lychee-bin/src/cache.rs

Lines changed: 44 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,10 @@ impl StoreExt for Cache {
4646
.has_headers(false)
4747
.from_path(path)?;
4848
for result in self {
49+
// Do not serialize errors to disk. We always want to recheck failing links.
50+
if matches!(result.value().status, CacheStatus::Error(_)) {
51+
continue;
52+
}
4953
wtr.serialize((result.key(), result.value()))?;
5054
}
5155
Ok(())
@@ -60,7 +64,7 @@ impl StoreExt for Cache {
6064
.has_headers(false)
6165
.from_path(path)?;
6266

63-
let map = DashMap::new();
67+
let map = Cache::new();
6468
let current_ts = timestamp();
6569
for result in rdr.deserialize() {
6670
let (uri, value): (Uri, CacheValue) = result?;
@@ -70,6 +74,14 @@ impl StoreExt for Cache {
7074
continue;
7175
}
7276

77+
// Discard errors. Caching errors goes against typical CI workflows.
78+
// If a link fails due to a network issue, a server outage, or if a previously
79+
// failing link has been fixed, reading an error from the cache prevents lychee
80+
// from realizing the link is now working.
81+
if matches!(value.status, CacheStatus::Error(_)) {
82+
continue;
83+
}
84+
7385
// Discard entries for status codes which have been excluded.
7486
// Without this check, an entry might be cached, then its status code is configured as
7587
// excluded, and in subsequent runs the cached value is still reused.
@@ -85,7 +97,6 @@ impl StoreExt for Cache {
8597

8698
#[cfg(test)]
8799
mod tests {
88-
use dashmap::DashMap;
89100
use http::StatusCode;
90101
use lychee_lib::{CacheStatus, StatusCodeSelector, StatusRange, Uri};
91102

@@ -98,7 +109,7 @@ mod tests {
98109
fn test_excluded_status_not_reused_from_cache() {
99110
let uri: Uri = "https://example.com".try_into().unwrap();
100111

101-
let cache: Cache = DashMap::<Uri, CacheValue>::new();
112+
let cache = Cache::new();
102113
cache.insert(
103114
uri.clone(),
104115
CacheValue {
@@ -116,4 +127,34 @@ mod tests {
116127
let cache = Cache::load(tmp.path(), u64::MAX, &excluder).unwrap();
117128
assert!(cache.get(&uri).is_none());
118129
}
130+
131+
#[test]
132+
fn test_errors_not_stored_in_cache() {
133+
let uri: Uri = "https://example.com/error".try_into().unwrap();
134+
135+
let cache = Cache::new();
136+
cache.insert(
137+
uri.clone(),
138+
CacheValue {
139+
status: CacheStatus::Error(Some(StatusCode::INTERNAL_SERVER_ERROR)),
140+
timestamp: timestamp(),
141+
},
142+
);
143+
let uri_none: Uri = "https://example.com/none".try_into().unwrap();
144+
cache.insert(
145+
uri_none.clone(),
146+
CacheValue {
147+
status: CacheStatus::Error(None),
148+
timestamp: timestamp(),
149+
},
150+
);
151+
152+
let tmp = tempfile::NamedTempFile::new().unwrap();
153+
cache.store(tmp.path()).unwrap();
154+
155+
let excluder = StatusCodeSelector::empty();
156+
let loaded_cache = Cache::load(tmp.path(), u64::MAX, &excluder).unwrap();
157+
assert!(loaded_cache.get(&uri).is_none());
158+
assert!(loaded_cache.get(&uri_none).is_none());
159+
}
119160
}

lychee-bin/tests/cli.rs

Lines changed: 52 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1316,8 +1316,8 @@ The config file should contain every possible key for documentation purposes."
13161316
"Missing OK entry in cache"
13171317
);
13181318
assert!(
1319-
data.contains(&format!("{}/,404", mock_server_err.uri())),
1320-
"Missing error entry in cache"
1319+
!data.contains(&format!("{}/,404", mock_server_err.uri())),
1320+
"Error entry should not be cached"
13211321
);
13221322

13231323
// Run again to verify cache behavior
@@ -1327,7 +1327,7 @@ The config file should contain every possible key for documentation purposes."
13271327
mock_server_ok.uri()
13281328
)))
13291329
.stderr(contains(format!(
1330-
"[404] {}/ (at 2:1) | Error (cached)\n",
1330+
"[404] {}/ (at 2:1) | Rejected status code: 404 Not Found (configurable with \"accept\" option)\n",
13311331
mock_server_err.uri()
13321332
)));
13331333

@@ -1454,8 +1454,10 @@ The config file should contain every possible key for documentation purposes."
14541454
// check content of cache file
14551455
let data = fs::read_to_string(&cache_file)?;
14561456
assert!(data.contains(&format!("{}/,200", mock_server_ok.uri())));
1457-
assert!(data.contains(&format!("{}/,418", mock_server_teapot.uri())));
1458-
assert!(data.contains(&format!("{}/,500", mock_server_server_error.uri())));
1457+
// Because the first run DID NOT use `--accept`, 418 and 500 were both treated as errors.
1458+
// With our new error dropping logic, NEITHER gets cached in the first run.
1459+
assert!(!data.contains(&format!("{}/,418", mock_server_teapot.uri())));
1460+
assert!(!data.contains(&format!("{}/,500", mock_server_server_error.uri())));
14591461

14601462
// run again to verify cache behavior
14611463
// this time accept 418 and 500 as valid status codes
@@ -1466,11 +1468,11 @@ The config file should contain every possible key for documentation purposes."
14661468
.assert()
14671469
.success()
14681470
.stderr(contains(format!(
1469-
"[418] {}/ (at 2:1) | OK (cached)",
1471+
"[418] {}/ (at 2:1) | 418 I'm a teapot: I'm a teapot",
14701472
mock_server_teapot.uri()
14711473
)))
14721474
.stderr(contains(format!(
1473-
"[500] {}/ (at 3:1) | OK (cached)",
1475+
"[500] {}/ (at 3:1) | 500 Internal Server Error: Internal Server Error",
14741476
mock_server_server_error.uri()
14751477
)));
14761478

@@ -1601,8 +1603,11 @@ The config file should contain every possible key for documentation purposes."
16011603
// If the status code was 999, the cache file should be empty
16021604
// because we do not want to cache unknown status codes
16031605
let buf = fs::read(&cache_file).unwrap();
1604-
let data = String::from_utf8(buf)?;
1605-
assert!(data.contains(",999,"));
1606+
assert!(
1607+
buf.is_empty(),
1608+
"cache file should be empty, but was {}",
1609+
String::from_utf8_lossy(&buf)
1610+
);
16061611

16071612
Ok(())
16081613
}
@@ -3753,4 +3758,42 @@ https://lychee.cli.rs/guides/cli/#fragments-ignored
37533758
.assert()
37543759
.success();
37553760
}
3761+
3762+
/// Verifies that loading an older, legacy `.lycheecache` file containing a cached error
3763+
/// correctly drops the error and successfully retries the link.
3764+
/// This ensures we don't break existing user CI workflows that have older cache files
3765+
/// stored before we stopped caching errors.
3766+
#[tokio::test]
3767+
async fn test_legacy_cache_file_ignores_errors() -> Result<()> {
3768+
let ts = std::time::SystemTime::now()
3769+
.duration_since(std::time::UNIX_EPOCH)
3770+
.unwrap()
3771+
.as_secs();
3772+
let dir = tempfile::tempdir()?;
3773+
let base_path = dir.path();
3774+
let cache_file = base_path.join(LYCHEE_CACHE_FILE);
3775+
3776+
// A server that is currently returning OK
3777+
let mock_server_ok = mock_server!(StatusCode::OK);
3778+
3779+
// Simulate an older `.lycheecache` where this exact URL previously failed with 404
3780+
// We use a future timestamp ({ts}) so it doesn't expire
3781+
fs::write(&cache_file, format!("{},404,{ts}\n", mock_server_ok.uri()))?;
3782+
3783+
let mut file = File::create(dir.path().join("input.txt"))?;
3784+
writeln!(file, "{}", mock_server_ok.uri())?;
3785+
3786+
// Run lychee. It should ignore the 404 from the cache, actually check the mock server (which returns 200), and succeed.
3787+
cargo_bin_cmd!()
3788+
.current_dir(base_path)
3789+
.arg("input.txt")
3790+
.arg("--cache")
3791+
.arg("--verbose")
3792+
.assert()
3793+
.success()
3794+
.stdout(contains("1 OK"))
3795+
.stdout(contains("0 Errors"));
3796+
3797+
Ok(())
3798+
}
37563799
}

0 commit comments

Comments
 (0)