Skip to content

Commit 439c248

Browse files
fix(stackablectl): Re-run GVK discovery after resolution failure (#294)
* fix(stackablectl): Re-run GVK discovery after resolution failure * Use tokio::sync::RwLock * Remove comment * Update rust/stackable-cockpit/src/utils/k8s/client.rs Co-authored-by: Nick <[email protected]> * tracing * tracing2 * changelog --------- Co-authored-by: Nick <[email protected]>
1 parent 85c9b52 commit 439c248

File tree

2 files changed

+63
-17
lines changed

2 files changed

+63
-17
lines changed

rust/stackable-cockpit/src/utils/k8s/client.rs

Lines changed: 58 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,14 @@ use k8s_openapi::api::{
77
use kube::{
88
api::{ListParams, Patch, PatchParams, PostParams},
99
core::{DynamicObject, GroupVersionKind, ObjectList, ObjectMeta, TypeMeta},
10-
discovery::Scope,
10+
discovery::{ApiCapabilities, ApiResource, Scope},
1111
Api, Discovery, ResourceExt,
1212
};
1313
use serde::Deserialize;
1414
use snafu::{OptionExt, ResultExt, Snafu};
1515
use stackable_operator::{commons::listener::Listener, kvp::Labels};
16+
use tokio::sync::RwLock;
17+
use tracing::info;
1618

1719
use crate::{
1820
platform::{cluster, credentials::Credentials},
@@ -39,12 +41,15 @@ pub enum Error {
3941
#[snafu(display("failed to deserialize YAML data"))]
4042
DeserializeYaml { source: serde_yaml::Error },
4143

44+
#[snafu(display("failed to run GVK discovery"))]
45+
GVKDiscoveryRun { source: kube::error::Error },
46+
47+
#[snafu(display("GVK {gvk:?} is not known"))]
48+
GVKUnkown { gvk: GroupVersionKind },
49+
4250
#[snafu(display("failed to deploy manifest because type of object {object:?} is not set"))]
4351
ObjectType { object: DynamicObject },
4452

45-
#[snafu(display("failed to deploy manifest because GVK {gvk:?} cannot be resolved"))]
46-
DiscoveryResolve { gvk: GroupVersionKind },
47-
4853
#[snafu(display("failed to convert byte string into UTF-8 string"))]
4954
ByteStringConvert { source: FromUtf8Error },
5055

@@ -66,7 +71,9 @@ pub enum Error {
6671

6772
pub struct Client {
6873
client: kube::Client,
69-
discovery: Discovery,
74+
75+
// Choosing an [`RwLock`] here, as their can be many reads in parallel, but running a discovery is very rare
76+
discovery: RwLock<Discovery>,
7077
}
7178

7279
impl Client {
@@ -77,10 +84,7 @@ impl Client {
7784
.await
7885
.context(KubeClientCreateSnafu)?;
7986

80-
let discovery = Discovery::new(client.clone())
81-
.run()
82-
.await
83-
.context(KubeClientFetchSnafu)?;
87+
let discovery = RwLock::new(Self::run_discovery(client.clone()).await?);
8488

8589
Ok(Self { client, discovery })
8690
}
@@ -112,9 +116,9 @@ impl Client {
112116

113117
let gvk = Self::gvk_of_typemeta(object_type);
114118
let (resource, capabilities) = self
115-
.discovery
116119
.resolve_gvk(&gvk)
117-
.context(DiscoveryResolveSnafu { gvk })?;
120+
.await?
121+
.context(GVKUnkownSnafu { gvk })?;
118122

119123
let api: Api<DynamicObject> = match capabilities.scope {
120124
Scope::Cluster => {
@@ -147,9 +151,9 @@ impl Client {
147151
gvk: &GroupVersionKind,
148152
namespace: Option<&str>,
149153
) -> Result<Option<ObjectList<DynamicObject>>, Error> {
150-
let object_api_resource = match self.discovery.resolve_gvk(gvk) {
151-
Some((object_api_resource, _)) => object_api_resource,
152-
None => {
154+
let object_api_resource = match self.resolve_gvk(gvk).await {
155+
Ok(Some((object_api_resource, _))) => object_api_resource,
156+
_ => {
153157
return Ok(None);
154158
}
155159
};
@@ -175,9 +179,9 @@ impl Client {
175179
object_name: &str,
176180
gvk: &GroupVersionKind,
177181
) -> Result<Option<DynamicObject>, Error> {
178-
let object_api_resource = match self.discovery.resolve_gvk(gvk) {
179-
Some((object_api_resource, _)) => object_api_resource,
180-
None => {
182+
let object_api_resource = match self.resolve_gvk(gvk).await {
183+
Ok(Some((object_api_resource, _))) => object_api_resource,
184+
_ => {
181185
return Ok(None);
182186
}
183187
};
@@ -383,6 +387,43 @@ impl Client {
383387
endpoints_api.get(name).await.context(KubeClientFetchSnafu)
384388
}
385389

390+
/// Try to resolve the given [`GroupVersionKind`]. In case the resolution fails a discovery is run to pull in new
391+
/// GVKs that are not present in the [`Discovery`] cache. Afterwards a normal resolution is issued.
392+
async fn resolve_gvk(
393+
&self,
394+
gvk: &GroupVersionKind,
395+
) -> Result<Option<(ApiResource, ApiCapabilities)>> {
396+
let resolved = self.discovery.read().await.resolve_gvk(gvk);
397+
398+
Ok(match resolved {
399+
Some(resolved) => Some(resolved),
400+
None => {
401+
info!(?gvk, "discovery did not include gvk");
402+
403+
// We take the lock early here to avoid running multiple discoveries in parallel (as they are expensive)
404+
let mut old_discovery = self.discovery.write().await;
405+
406+
// We create a new Discovery object here, as [`Discovery::run`] consumes self
407+
let new_discovery = Self::run_discovery(self.client.clone()).await?;
408+
*old_discovery = new_discovery;
409+
410+
// Release the lock as quickly as possible
411+
drop(old_discovery);
412+
self.discovery.read().await.resolve_gvk(gvk)
413+
}
414+
})
415+
}
416+
417+
/// Creates a new [`Discovery`] object and immediatly runs a discovery.
418+
#[tracing::instrument(skip_all)]
419+
async fn run_discovery(client: kube::client::Client) -> Result<Discovery> {
420+
info!("running discovery");
421+
Discovery::new(client)
422+
.run()
423+
.await
424+
.context(GVKDiscoveryRunSnafu)
425+
}
426+
386427
/// Extracts the [`GroupVersionKind`] from [`TypeMeta`].
387428
fn gvk_of_typemeta(type_meta: &TypeMeta) -> GroupVersionKind {
388429
match type_meta.api_version.split_once('/') {

rust/stackablectl/CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,16 @@ All notable changes to this project will be documented in this file.
44

55
## [Unreleased]
66

7+
### Fixed
8+
9+
- Re-run GVK discovery after resolution failure ([#294]).
10+
711
## [24.3.3] - 2024-05-13
812

913
- Bump Rust, Go and Node dependencies ([#238]).
1014

1115
[#238]: https://github.com/stackabletech/stackable-cockpit/pull/238
16+
[#294]: https://github.com/stackabletech/stackable-cockpit/pull/294
1217

1318
## [24.3.2] - 2024-04-25
1419

0 commit comments

Comments
 (0)