Skip to content

Commit c80ecfc

Browse files
committed
fix: stabilise the monitor agent id
Rather than retrying the request at most 3 times, and then simply setting the agentId to an automatically generated UUID, we instead keep retrying the request indefinitely until we're able to retrieve the Kubernetes Deployment ID. The retry logic will backoff exponentially up to a maximum configured value in seconds.
1 parent 6ff5bbe commit c80ecfc

File tree

4 files changed

+44
-5
lines changed

4 files changed

+44
-5
lines changed

config.default.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,5 +15,6 @@
1515
"REQUEST_QUEUE_LENGTH": 2,
1616
"QUEUE_LENGTH_LOG_FREQUENCY_MINUTES": 15,
1717
"INTEGRATION_ID": "",
18-
"DEFAULT_KUBERNETES_UPSTREAM_URL": "https://kubernetes-upstream.snyk.io"
18+
"DEFAULT_KUBERNETES_UPSTREAM_URL": "https://kubernetes-upstream.snyk.io",
19+
"MAX_RETRY_BACKOFF_DURATION_SECONDS": 300
1920
}

src/supervisor/agent.ts

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import { config } from '../common/config';
22
import { logger } from '../common/logger';
33
import { k8sApi } from './cluster';
4-
import { retryKubernetesApiRequest } from './kuberenetes-api-wrappers';
4+
import { retryKubernetesApiRequestIndefinitely } from './kuberenetes-api-wrappers';
55

66
export async function setSnykMonitorAgentId(): Promise<void> {
77
const name = config.DEPLOYMENT_NAME;
@@ -20,8 +20,9 @@ async function getSnykMonitorDeploymentUid(
2020
namespace: string,
2121
): Promise<string | undefined> {
2222
try {
23-
const attemptedApiCall = await retryKubernetesApiRequest(() =>
24-
k8sApi.appsClient.readNamespacedDeployment(name, namespace),
23+
const attemptedApiCall = await retryKubernetesApiRequestIndefinitely(
24+
() => k8sApi.appsClient.readNamespacedDeployment(name, namespace),
25+
config.MAX_RETRY_BACKOFF_DURATION_SECONDS,
2526
);
2627
return attemptedApiCall.body.metadata?.uid;
2728
} catch (error) {

src/supervisor/kuberenetes-api-wrappers.ts

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import * as http from 'http';
22
import * as sleep from 'sleep-promise';
3+
import { logger } from '../common/logger';
34
import { IRequestError } from './types';
45

56
export const ATTEMPTS_MAX = 3;
@@ -26,6 +27,42 @@ export async function retryKubernetesApiRequest<ResponseType>(
2627
throw new Error('Could not receive a response from the Kubernetes API');
2728
}
2829

30+
/**
31+
* This function retries requests to the Kubernetes API indefinitely. We use this
32+
* function when starting the Kubernetes Monitor to ensure the agentId is correctly
33+
* set to the deployment ID.
34+
*
35+
* @param func function to retry
36+
* @param maxSleepDuration maximum sleep duration in seconds (e.g. 300)
37+
* @returns Promise<ResponseType>
38+
*/
39+
export async function retryKubernetesApiRequestIndefinitely<ResponseType>(
40+
func: IKubernetesApiFunction<ResponseType>,
41+
maxSleepDuration: number,
42+
): Promise<ResponseType> {
43+
let attempts: number = 1;
44+
45+
while (true) {
46+
try {
47+
return await func();
48+
} catch (err) {
49+
if (!shouldRetryRequest(err, 1)) {
50+
throw err;
51+
}
52+
53+
const backoff = Math.pow(2, attempts);
54+
const sleepSeconds = Math.min(backoff, maxSleepDuration);
55+
logger.error(
56+
{ error: err },
57+
`connection to kubernetes API failed, retrying in ${sleepSeconds} seconds`,
58+
);
59+
60+
await sleep(sleepSeconds * 1000);
61+
attempts++;
62+
}
63+
}
64+
}
65+
2966
export function calculateSleepSeconds(
3067
httpResponse: http.IncomingMessage,
3168
): number {

test/system/kind.spec.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ test('Kubernetes-Monitor with KinD', async (jestDoneCallback) => {
5858

5959
const agentId = randomUUID();
6060
const retryKubernetesApiRequestMock = jest
61-
.spyOn(kubernetesApiWrappers, 'retryKubernetesApiRequest')
61+
.spyOn(kubernetesApiWrappers, 'retryKubernetesApiRequestIndefinitely')
6262
.mockResolvedValueOnce({
6363
body: {
6464
metadata: {

0 commit comments

Comments
 (0)