Skip to content

Commit 9989b59

Browse files
committed
scrapper: insert fake OCI digests to unstuck nodes
A bug introduced in coreos-assembler[1] (reverted [2]) ended up creating disk images that had different OCI digests than the released OCI image. This results in deployed nodes being unable to update [3] because Zincati look for the deployed OCI checksum in the graph to determines the update path [4]. This introduces a workaround for it : one day a week we will change the image pullspec in the OCI graph with the invalid values to give a chance to Zincati to trigger an update. This will get the node back to a valid state. Note that we won't update the last release in the graph, so Zincati have a valid checksum to fetch from the registry. [1] coreos/coreos-assembler@9190a34 [2] coreos/coreos-assembler#4374 [3] coreos/fedora-coreos-tracker#2066 [4] https://github.com/coreos/zincati/blob/238a79a9c2d11a39d7b7f9c6e71888b75d2c6ab3/src/cincinnati/mod.rs#L230-L245
1 parent 4a06ec8 commit 9989b59

File tree

5 files changed

+270
-0
lines changed

5 files changed

+270
-0
lines changed

dist/fedora-infra/Dockerfile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,5 +18,8 @@ RUN cargo build --release && \
1818
# build: cleanup
1919
RUN cargo clean
2020

21+
# temporary: fix the invalid bootimages digests
22+
RUN cp /src/dist/fedora-infra/bootimages_digests.json /data.json
23+
2124
# run: default config
2225
WORKDIR /
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
{
2+
"43.20251024.3.0": {
3+
"x86_64": {
4+
"good": "sha256:44528ecc3fe8ab2c2a4d0990cdc0898aca334aa632d4a77f23118f8900435636",
5+
"bad": "sha256:ca99893c80a7b84dd84d4143bd27538207c2f38ab6647a58d9c8caa251f9a087"
6+
},
7+
"aarch64": {
8+
"good": "sha256:bba9eff19e3da927c09644eefd42303c4dc7401844cee8e849115466f13b08e9",
9+
"bad": "sha256:bb356df5b2a9356c0ec966a35abd0cd8b199c8ddc18b9c70e81baa4c2401796c"
10+
},
11+
"ppc64le": {
12+
"good": "sha256:7119567796344ac0f55ed18ada84f504b008cf45d2165a276d9388efe3b2ed68",
13+
"bad": "sha256:79df7ec5156068fc7066561440f1b6bd468307b5f190e3426ad1180039603511"
14+
},
15+
"s390x": {
16+
"good": "sha256:6fe9f2d0a4e611e98f5c1d340f86e9a9839fe32f0a210ed5a2502308870871de",
17+
"bad": "sha256:c02814c3a49dffa5150609209e2bcc5ed3e708b2be3ba732b66bda619c36f0ac"
18+
}
19+
},
20+
"43.20251024.2.0": {
21+
"x86_64": {
22+
"good": "sha256:206d8bd241ed75329d4ab35f73d244c757112a846bd158972287c9bf22c37b19",
23+
"bad": "sha256:8aeabcbfbdf432c657f55cf6f247adf188606db5e0034a922f75d90229c4b9ba"
24+
},
25+
"aarch64": {
26+
"good": "sha256:eff5b8be876cca882a9862f44235c42f53215dbb5290391e7d606015f89ca0ca",
27+
"bad": "sha256:cf77c783f8fad63ce2c546ac274576b67c0502dd2546160b3bc286260ebaa30c"
28+
},
29+
"ppc64le": {
30+
"good": "sha256:8848ade6a061d0ca393abf9d257c74de7c8c01ca2c7f52dd6d0e99affe4094fb",
31+
"bad": "sha256:1c5fcba935cdc8e76a79ec6d9904cdd66be4a84721e5cbed8e756e9c6f1c3e86"
32+
},
33+
"s390x": {
34+
"good": "sha256:ad2bfeae0a677346de25ed89bb7a15e578b23e62c5b19f09ab96e76020e849c9",
35+
"bad": "sha256:b4e96ede2fb324dbb5509700ed516bbaf6604242bf3b85bc87f10372f2235e2d"
36+
}
37+
},
38+
"43.20251110.2.0": {
39+
"x86_64": {
40+
"good": "sha256:beaf03ab8a2996277ca639cd107212e79b21fb48d28a46da7279129ea8627814",
41+
"bad": "sha256:d124db75754b48750f28d9c81ec27b01ad81ff834032ac43e287b45ec98cc905"
42+
},
43+
"aarch64": {
44+
"good": "sha256:854745b12ddea3dd607b5073083e8f6371017fbdd905d564b0b750892b2d7e0f",
45+
"bad": "sha256:369dd94edc36dfb461eb4a05d0b05df10d74208eff7107ce58d7244205023280"
46+
},
47+
"ppc64le": {
48+
"good": "sha256:698326f0b7bcb7db4fc00afedba026924a45c4360a2c0d4c9647c2f5d0d289d3",
49+
"bad": "sha256:54b1aeeb240954f9ac3456ae334f96fcb6f5a8256de33d166ca4d418d02373cd"
50+
},
51+
"s390x": {
52+
"good": "sha256:b49ce46cd6b96984f25268aba87be345357303e429ff669bb48db644d466cf8a",
53+
"bad": "sha256:ce31221559e9eb304147cb8d2530103538c0536113c8214d6e431cf1d67b953a"
54+
}
55+
},
56+
"43.20251024.1.0": {
57+
"x86_64": {
58+
"good": "sha256:92f750c7b4e69fe9ee05173fcd64b688042007edcd93854c9e1a7143aaaefbac",
59+
"bad": "sha256:d1bb889a3b6b2f18b04640359d32bc2e3d662499b66c44b7332e0c4746f37342"
60+
},
61+
"aarch64": {
62+
"good": "sha256:39937b019590f5dc0f22de502abe7dda3faf491f7a6c12c25ada3c8ba33c1f43",
63+
"bad": "sha256:d54697a5760ece8bebbed51ff283b9a710ac9665ece4bdbb7115270ef5483ac6"
64+
},
65+
"ppc64le": {
66+
"good": "sha256:79c1eafa01d58df521806084859a76de4856aa0fc4403b6ffd1434988a2c45f3",
67+
"bad": "sha256:a9dc410bded576283b8d99a9565979715cd3fb4269d8cd023566e63753bf8b25"
68+
},
69+
"s390x": {
70+
"good": "sha256:b6030886a39c9b2c2168fa1f84cf65e9bd6ed25eff146292716df7879de79b5f",
71+
"bad": "sha256:dc1097ff0df3671ba0079c059f78de2a2fe90a4bd9cd4a83ed7af92d6804665d"
72+
}
73+
},
74+
"43.20251027.1.0": {
75+
"x86_64": {
76+
"good": "sha256:a8ccd268e4ffd2241f4a29e7683f97cfacaf1c84b48314b41b325038d7775f33",
77+
"bad": "sha256:b7d3fa44789c700a6544c1ede17d1e66b32edd476cfe6e4798b24d7e7acde04b"
78+
},
79+
"aarch64": {
80+
"good": "sha256:ee141e4bf376c3523557142a2ab52f249fed784fcc4d625908c2de98af88573c",
81+
"bad": "sha256:d01f69f0f231e4b6335bf116335d7ffc8428d490608122ca78da43e025c61d0b"
82+
},
83+
"ppc64le": {
84+
"good": "sha256:87e0b9a5e9f92a857bc29c5eb9e9723cdc0298e6d342a0b6cced9797836c08d8",
85+
"bad": "sha256:2a49d71fe9517d635935ed8e09055ed39a53d67405c578657020a0ae783f7533"
86+
},
87+
"s390x": {
88+
"good": "sha256:b83a45f1d837b9ed2318c313754fdfefa28af3583d64edca953362cec674010b",
89+
"bad": "sha256:402787f3cb3b76cfc5b34cec6a3e5a73baa99ffacbef294569bba90d7cb48f01"
90+
}
91+
},
92+
"43.20251110.1.0": {
93+
"x86_64": {
94+
"good": "sha256:441e88ad05bf4eca61e5802248aa8e79089fe060686fe6fe90b0ea8f90b7e839",
95+
"bad": "sha256:e5e865a5c1643ffecbccf8b7abe24bc57db4b0469acc697bb6ca6abe4c5ea14c"
96+
},
97+
"aarch64": {
98+
"good": "sha256:a95f637ff7fcf4f06d24060a8a7fe582cc9b9133eacb666f7d848f389d3a7fe6",
99+
"bad": "sha256:9cac4529affce183c4b8af61050391127a816f1766264c02b78dbad97ff67514"
100+
},
101+
"ppc64le": {
102+
"good": "sha256:17766ebf6e96e860fb7c87e9cda038d9cc02aa447f483ba543c3f48e5e84f314",
103+
"bad": "sha256:10779dabd8a9b094d358a9769fc7606c07cd50a459ec27f514a83fbcb0a53539"
104+
},
105+
"s390x": {
106+
"good": "sha256:2011cb2d8baa33064a769d4da596accfb179a690d971ffe16924ea832d35b5ab",
107+
"bad": "sha256:ee3e72931587bcc0ff36fa82f5e2beb4d5695caac49ff9c3f4b7562224f94cb1"
108+
}
109+
}
110+
}

fcos-graph-builder/src/main.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ mod cli;
77
mod config;
88
mod scraper;
99
mod settings;
10+
mod workaround_issue_2066;
1011

1112
use actix::prelude::*;
1213
use actix_web::{web, App, HttpResponse};

fcos-graph-builder/src/scraper.rs

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ use std::collections::HashMap;
77
use std::num::NonZeroU64;
88
use std::time::Duration;
99

10+
use crate::workaround_issue_2066::DigestsMapper;
11+
1012
/// Default timeout for HTTP requests (30 minutes).
1113
const DEFAULT_HTTP_REQ_TIMEOUT: Duration = Duration::from_secs(30 * 60);
1214

@@ -22,6 +24,9 @@ pub struct Scraper {
2224
pause_secs: NonZeroU64,
2325
release_index_url: reqwest::Url,
2426
updates_url: reqwest::Url,
27+
// hotfix to serve out fake graphs to unstock nodes
28+
// see https://github.com/coreos/fedora-coreos-tracker/issues/2066
29+
bad_digests_mapper: DigestsMapper,
2530
}
2631

2732
impl Scraper {
@@ -50,6 +55,7 @@ impl Scraper {
5055
.timeout(DEFAULT_HTTP_REQ_TIMEOUT)
5156
.build()?;
5257

58+
let bad_digests_mapper = DigestsMapper::new_from_file()?;
5359
let scraper = Self {
5460
graphs,
5561
oci_graphs,
@@ -58,6 +64,7 @@ impl Scraper {
5864
stream,
5965
release_index_url: reqwest::Url::parse(&releases_json)?,
6066
updates_url: reqwest::Url::parse(&updates_json)?,
67+
bad_digests_mapper,
6168
};
6269
Ok(scraper)
6370
}
@@ -112,9 +119,25 @@ impl Scraper {
112119
let stream = self.stream.clone();
113120
let arches: Vec<String> = self.graphs.keys().cloned().collect();
114121

122+
// cloning the bad digests_map object isn't great but it avoids dealing with the
123+
// lifetimes issues of moving `self` in the async block.
124+
// The graph is cached after it's build so it's reasonnable to eat the cloning cost
125+
let bad_digests_mapper = self.bad_digests_mapper.clone();
126+
115127
async move {
116128
let (graph, updates) =
117129
futures::future::try_join(stream_releases, stream_updates).await?;
130+
131+
// patch some digests to unstuck nodes that booted with the wrong OCI digests
132+
// See https://github.com/coreos/fedora-coreos-tracker/issues/2066
133+
let graph = if DigestsMapper::should_patch() {
134+
let mut graph = graph;
135+
bad_digests_mapper.fix_releases(&mut graph);
136+
graph
137+
} else {
138+
graph
139+
};
140+
118141
// first the legacy graphs
119142
let mut map = HashMap::with_capacity(arches.len());
120143
for arch in &arches {
Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
// some boot images were shipped with a deployed container hash
2+
// that does not match what was released. This leads Zincati to not
3+
// find the booted deployement in the graph, and cannot update out of it.
4+
// To unstuck these nodes we serve an incorrect graph one day of the week
5+
// to allow these nodes to update.
6+
7+
use chrono::prelude::*;
8+
use chrono::Weekday;
9+
use commons::metadata::Release;
10+
use failure::Error;
11+
use serde::{Deserialize, Serialize};
12+
use std::collections::HashMap;
13+
use std::fs::File;
14+
use std::io::BufReader;
15+
use std::option::Option;
16+
17+
// We will only serve the patched graph on Mondays
18+
static DAY_OF_THE_WEEK: Weekday = Weekday::Mon;
19+
20+
#[allow(dead_code)]
21+
static BAD_HASHES_SOURCE_URL: &str = "https://github.com/user-attachments/files/23700782/data.json";
22+
static BAD_HASHES_SOURCE_PATH: &str = "/data.json";
23+
24+
// This is all strings, so let's define some aliases to make it easier to reason about
25+
type Version = String;
26+
type Arch = String;
27+
type Digest = String;
28+
29+
// Each entry in the good / bad hashes map looks like this:
30+
// "43.20251024.3.0": {
31+
// "x86_64": {
32+
// "good": "sha256:44528ecc3fe8ab2c2a4d0990cdc0898aca334aa632d4a77f23118f8900435636",
33+
// "bad": "sha256:ca99893c80a7b84dd84d4143bd27538207c2f38ab6647a58d9c8caa251f9a087"
34+
// },
35+
// "aarch64": {
36+
// "good": "sha256:bba9eff19e3da927c09644eefd42303c4dc7401844cee8e849115466f13b08e9",
37+
// "bad": "sha256:bb356df5b2a9356c0ec966a35abd0cd8b199c8ddc18b9c70e81baa4c2401796c"
38+
// },
39+
// ..... // the other arches
40+
// }
41+
42+
/// Represents a hash mapping between the good and bad SHA-256 digests.
43+
#[derive(Clone, Debug, Deserialize, Serialize)]
44+
pub struct GoodBadDigests {
45+
pub good: Digest,
46+
pub bad: Digest,
47+
}
48+
49+
/// Under each version, there is a bad-good digest map for each architecture
50+
pub type VersionEntry = HashMap<Arch, GoodBadDigests>;
51+
52+
// The top level entry in the map.
53+
/// unfortunately we can't apply derive macros to type aliases
54+
// so we wrap it into the struct then use serde's flatten attribute
55+
#[derive(Clone, Debug, Deserialize, Serialize)]
56+
pub struct DigestsMapper {
57+
#[serde(flatten)]
58+
version_digests_map: HashMap<Version, VersionEntry>,
59+
}
60+
61+
impl DigestsMapper {
62+
#[allow(dead_code)]
63+
pub async fn new_from_url(client: reqwest::Client) -> Result<DigestsMapper, Error> {
64+
let req = client.get(BAD_HASHES_SOURCE_URL);
65+
66+
let resp = req.send().await?;
67+
let content = resp.error_for_status()?;
68+
let json = content.json::<DigestsMapper>().await?;
69+
Ok(json)
70+
}
71+
72+
pub fn new_from_file() -> Result<DigestsMapper, Error> {
73+
let file = File::open(BAD_HASHES_SOURCE_PATH)?;
74+
let reader = BufReader::new(file);
75+
76+
let digests = serde_json::from_reader(reader)?;
77+
Ok(digests)
78+
}
79+
80+
// we only inject wrong values one day of the week.
81+
pub fn should_patch() -> bool {
82+
let today: Weekday = Utc::now().weekday();
83+
84+
today == DAY_OF_THE_WEEK
85+
}
86+
87+
fn get_bad_hash_for_version_and_arch(&self, version: &Version, arch: &Arch) -> Option<String> {
88+
self.version_digests_map
89+
.get(version)
90+
.and_then(|version_entry| version_entry.get(arch).map(|digests| digests.bad.clone()))
91+
}
92+
93+
pub fn fix_releases(&self, releases: &mut Vec<Release>) {
94+
// We don't want to touch the last entry, it needs to be a valid target for update.
95+
let last_release = releases.pop();
96+
97+
for entry in releases.iter_mut() {
98+
if let Some(releases_oci) = entry.oci_images.as_mut() {
99+
// The unwrap is safe here as we checked for is_some() above
100+
for oci_release in releases_oci.iter_mut() {
101+
let bad_hash = self.get_bad_hash_for_version_and_arch(
102+
&entry.version,
103+
&oci_release.architecture,
104+
);
105+
106+
if let Some(bad_hash) = bad_hash {
107+
debug!(
108+
"found bad hash for {} - {}",
109+
&entry.version, &oci_release.architecture
110+
);
111+
debug!("Original ReleaseOciImage:\n {oci_release:?}");
112+
// digest_ref is a digested pullspec: $oci_image_name@$digest so we need to split it
113+
// and change only the digest part.
114+
let (img_name, _) = oci_release
115+
.digest_ref
116+
.split_once('@')
117+
// The unwrap is safe here, we are always dealing with a digested pullspec
118+
.unwrap();
119+
120+
oci_release.digest_ref = format!("{img_name}@{bad_hash}");
121+
info!(
122+
"Patched release {} with a bad digest from the bootimage.",
123+
&entry.version
124+
);
125+
debug!("Patched ReleaseOciImage:\n {oci_release:?}");
126+
}
127+
}
128+
}
129+
}
130+
131+
releases.push(last_release.unwrap().clone());
132+
}
133+
}

0 commit comments

Comments
 (0)