Skip to content

Commit 47a769f

Browse files
author
Arthur Granado
committed
fix: retry on additional errors for Kubernetes API calls
Some requests to the Kubernetes API server can fail due to network errors which we don't handle and can cause the container to crash. We think the errors may be temporary and have added a retry mechanism that repeats the request a few times. If the error does not resolve after a few retries then we allow the container to crash like it did previously.
1 parent 966f0ad commit 47a769f

File tree

3 files changed

+30
-2
lines changed

3 files changed

+30
-2
lines changed

src/supervisor/watchers/handlers/index.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import { replicaSetWatchHandler } from './replica-set';
1010
import { replicationControllerWatchHandler } from './replication-controller';
1111
import { statefulSetWatchHandler } from './stateful-set';
1212
import { k8sApi, kubeConfig } from '../../cluster';
13+
import * as kubernetesApiWrappers from '../../kuberenetes-api-wrappers';
1314
import { IWorkloadWatchMetadata, FALSY_WORKLOAD_NAME_MARKER } from './types';
1415

1516
/**
@@ -98,7 +99,8 @@ export function setupInformer(namespace: string, workloadKind: WorkloadKind) {
9899
const listMethod = workloadMetadata.listFactory(namespace);
99100
const loggedListMethod = async () => {
100101
try {
101-
return await listMethod();
102+
return await kubernetesApiWrappers.retryKubernetesApiRequest(
103+
() => listMethod());
102104
} catch (err) {
103105
logger.error({err, namespace, workloadKind}, 'error while listing entities on namespace');
104106
throw err;

src/supervisor/watchers/index.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import logger = require('../../common/logger');
66
import { WorkloadKind } from '../types';
77
import { setupInformer } from './handlers';
88
import { kubeConfig, k8sApi } from '../cluster';
9+
import * as kubernetesApiWrappers from '../kuberenetes-api-wrappers';
910
import { kubernetesInternalNamespaces } from './internal-namespaces';
1011

1112
/**
@@ -52,7 +53,8 @@ function setupWatchesForCluster(): void {
5253
'/api/v1/namespaces',
5354
async () => {
5455
try {
55-
return await k8sApi.coreClient.listNamespace();
56+
return await kubernetesApiWrappers.retryKubernetesApiRequest(
57+
() => k8sApi.coreClient.listNamespace());
5658
} catch (err) {
5759
logger.error({err}, 'error while listing namespaces');
5860
throw err;

test/system/kind.test.ts

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,30 @@ tap.test('Kubernetes-Monitor with KinD', async (t) => {
6969
]);
7070

7171
// Setup nocks
72+
nock(/https\:\/\/127\.0\.0\.1\:\d+/, { allowUnmocked: true})
73+
.get('/api/v1/namespaces')
74+
.times(1)
75+
.replyWithError({
76+
code: 'ECONNREFUSED'
77+
})
78+
.get('/api/v1/namespaces')
79+
.times(1)
80+
.replyWithError({
81+
code: 'ETIMEDOUT'
82+
});
83+
84+
nock(/https\:\/\/127\.0\.0\.1\:\d+/, { allowUnmocked: true})
85+
.get('/apis/apps/v1/namespaces/snyk-monitor/deployments')
86+
.times(1)
87+
.replyWithError({
88+
code: 'ECONNREFUSED'
89+
})
90+
.get('/apis/apps/v1/namespaces/snyk-monitor/deployments')
91+
.times(1)
92+
.replyWithError({
93+
code: 'ETIMEDOUT'
94+
});
95+
7296
nock('https://kubernetes-upstream.snyk.io')
7397
.post('/api/v1/workload')
7498
.times(1)

0 commit comments

Comments
 (0)