Skip to content

Commit 1c29e6f

Browse files
authored
Merge pull request #990 from snyk/fix/retry-informer
fix: retry on network errors in K8s API informers
2 parents 74714da + 780cf1a commit 1c29e6f

File tree

4 files changed

+23
-20
lines changed

4 files changed

+23
-20
lines changed

src/supervisor/kuberenetes-api-wrappers.ts

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,13 @@ import sleep from 'sleep-promise';
33

44
import { logger } from '../common/logger';
55
import { IRequestError } from './types';
6+
import { RETRYABLE_NETWORK_ERRORS } from './watchers/types';
67

78
export const ATTEMPTS_MAX = 3;
89
export const DEFAULT_SLEEP_SEC = 1;
910
export const MAX_SLEEP_SEC = 5;
1011
type IKubernetesApiFunction<ResponseType> = () => Promise<ResponseType>;
1112

12-
const RETRYABLE_NETWORK_ERRORS: string[] = [
13-
'ECONNREFUSED',
14-
'ETIMEDOUT',
15-
'ECONNRESET',
16-
];
17-
1813
export async function retryKubernetesApiRequest<ResponseType>(
1914
func: IKubernetesApiFunction<ResponseType>,
2015
): Promise<ResponseType> {

src/supervisor/watchers/handlers/index.ts

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ import {
3030
import { k8sApi, kubeConfig } from '../../cluster';
3131
import * as kubernetesApiWrappers from '../../kuberenetes-api-wrappers';
3232
import { IWorkloadWatchMetadata, FALSY_WORKLOAD_NAME_MARKER } from './types';
33-
import { ECONNRESET_ERROR_CODE } from '../types';
33+
import { RETRYABLE_NETWORK_ERRORS } from '../types';
3434

3535
/**
3636
* This map is used in combination with the kubernetes-client Informer API
@@ -173,10 +173,11 @@ export async function setupInformer(
173173
namespace: string,
174174
workloadKind: WorkloadKind,
175175
): Promise<void> {
176+
const logContext: Record<string, unknown> = { namespace, workloadKind };
176177
const isSupported = await isSupportedWorkload(namespace, workloadKind);
177178
if (!isSupported) {
178179
logger.info(
179-
{ namespace, workloadKind },
180+
logContext,
180181
'The Kubernetes cluster does not support this workload',
181182
);
182183
return;
@@ -196,7 +197,7 @@ export async function setupInformer(
196197
);
197198
} catch (err) {
198199
logger.error(
199-
{ err, namespace, workloadKind },
200+
{ ...logContext, err },
200201
'error while listing entities on namespace',
201202
);
202203
throw err;
@@ -211,18 +212,22 @@ export async function setupInformer(
211212

212213
informer.on(ERROR, (err) => {
213214
// Types from client library insists that callback is of type KubernetesObject
214-
if ((err as any).code === ECONNRESET_ERROR_CODE) {
215+
const code = (err as any).code || '';
216+
if (RETRYABLE_NETWORK_ERRORS.includes(code)) {
215217
logger.debug(
216-
{},
217-
`informer ${ECONNRESET_ERROR_CODE} occurred, restarting informer`,
218+
logContext,
219+
`informer ${code} occurred, restarting informer`,
218220
);
219221

220222
// Restart informer after 1sec
221223
setTimeout(async () => {
222224
await informer.start();
223225
}, 1000);
224226
} else {
225-
logger.error({ err }, 'unexpected informer error event occurred');
227+
logger.error(
228+
{ ...logContext, err },
229+
'unexpected informer error event occurred',
230+
);
226231
}
227232
});
228233

@@ -235,7 +240,7 @@ export async function setupInformer(
235240
(watchedWorkload.metadata && watchedWorkload.metadata.name) ||
236241
FALSY_WORKLOAD_NAME_MARKER;
237242
logger.warn(
238-
{ error, namespace, name, workloadKind },
243+
{ ...logContext, error, name },
239244
'could not execute the informer handler for a workload',
240245
);
241246
}

src/supervisor/watchers/index.ts

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ import { V1Namespace } from '@kubernetes/client-node';
44
import { logger } from '../../common/logger';
55
import { config } from '../../common/config';
66
import { WorkloadKind } from '../types';
7-
import { ECONNRESET_ERROR_CODE } from './types';
87
import { setupInformer } from './handlers';
98
import { kubeConfig, k8sApi } from '../cluster';
109
import * as kubernetesApiWrappers from '../kuberenetes-api-wrappers';
@@ -13,6 +12,7 @@ import {
1312
openshiftInternalNamespaces,
1413
} from './internal-namespaces';
1514
import { state } from '../../state';
15+
import { RETRYABLE_NETWORK_ERRORS } from './types';
1616

1717
async function setupWatchesForNamespace(namespace: V1Namespace): Promise<void> {
1818
const namespaceName = extractNamespaceName(namespace);
@@ -73,10 +73,9 @@ async function setupWatchesForCluster(): Promise<void> {
7373

7474
informer.on(ERROR, (err) => {
7575
// Types from client library insists that callback is of type V1Namespace
76-
if ((err as any).code === ECONNRESET_ERROR_CODE) {
77-
logger.debug(
78-
`namespace informer ${ECONNRESET_ERROR_CODE} occurred, restarting informer`,
79-
);
76+
const code = (err as any).code || '';
77+
if (RETRYABLE_NETWORK_ERRORS.includes(code)) {
78+
logger.debug(`namespace informer ${code} occurred, restarting informer`);
8079

8180
// Restart informer after 1sec
8281
setTimeout(async () => {

src/supervisor/watchers/types.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,4 +11,8 @@ export enum PodPhase {
1111
Unknown = 'Unknown',
1212
}
1313

14-
export const ECONNRESET_ERROR_CODE = 'ECONNRESET';
14+
export const RETRYABLE_NETWORK_ERRORS: readonly string[] = [
15+
'ECONNREFUSED',
16+
'ETIMEDOUT',
17+
'ECONNRESET',
18+
];

0 commit comments

Comments
 (0)