Skip to content

Commit 9498ee2

Browse files
committed
feat: collect telemetry to understand time spent scanning images
We are looking to improve our process of scanning images. Right now we have no visibility into how long an average image scan takes. For this reason we collect the following timings: time spent in queue, time spent pulling images, time spent scanning images. Additionally, collect the current queue size at the start of the image scan.
1 parent 5eb4ce4 commit 9498ee2

File tree

6 files changed

+68
-8
lines changed

6 files changed

+68
-8
lines changed

src/scanner/index.ts

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,16 @@ import {
1212
constructDepGraph,
1313
constructScanResults,
1414
} from '../transmitter/payload';
15-
import { IWorkload, ILocalWorkloadLocator } from '../transmitter/types';
15+
import {
16+
IWorkload,
17+
ILocalWorkloadLocator,
18+
Telemetry,
19+
} from '../transmitter/types';
1620
import { IPullableImage, IScanImage } from './images/types';
1721

1822
export async function processWorkload(
1923
workloadMetadata: IWorkload[],
24+
telemetry: Partial<Telemetry>,
2025
): Promise<void> {
2126
// every workload metadata references the same workload name, grab it from the first one
2227
const workloadName = workloadMetadata[0].name;
@@ -27,20 +32,24 @@ export async function processWorkload(
2732
'pulling unique images',
2833
);
2934
const imagesWithFileSystemPath = getImagesWithFileSystemPath(uniqueImages);
35+
const imagePullStartTimestampMs = Date.now();
3036
const pulledImages = await pullImages(imagesWithFileSystemPath);
37+
const imagePullDurationMs = Date.now() - imagePullStartTimestampMs;
3138
if (pulledImages.length === 0) {
3239
logger.info(
3340
{ workloadName },
3441
'no images were pulled, halting scanner process.',
3542
);
3643
return;
3744
}
45+
telemetry.imagePullDurationMs = imagePullDurationMs;
3846

3947
try {
4048
await scanImagesAndSendResults(
4149
workloadName,
4250
pulledImages,
4351
workloadMetadata,
52+
telemetry,
4453
);
4554
} finally {
4655
await removePulledImages(pulledImages);
@@ -105,8 +114,11 @@ async function scanImagesAndSendResults(
105114
workloadName: string,
106115
pulledImages: IPullableImage[],
107116
workloadMetadata: IWorkload[],
117+
telemetry: Partial<Telemetry>,
108118
): Promise<void> {
119+
const imageScanStartTimestampMs = Date.now();
109120
const scannedImages = await scanImages(pulledImages);
121+
const imageScanDurationMs = Date.now() - imageScanStartTimestampMs;
110122

111123
if (scannedImages.length === 0) {
112124
logger.info(
@@ -116,6 +128,8 @@ async function scanImagesAndSendResults(
116128
return;
117129
}
118130

131+
telemetry.imageScanDurationMs = imageScanDurationMs;
132+
119133
logger.info(
120134
{ workloadName, imageCount: scannedImages.length },
121135
'successfully scanned images',
@@ -124,6 +138,7 @@ async function scanImagesAndSendResults(
124138
const scanResultsPayloads = constructScanResults(
125139
scannedImages,
126140
workloadMetadata,
141+
telemetry,
127142
);
128143
const success = await sendScanResults(scanResultsPayloads);
129144
if (!success) {

src/supervisor/watchers/handlers/pod.ts

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import { logger } from '../../../common/logger';
44
import { config } from '../../../common/config';
55
import { processWorkload } from '../../../scanner';
66
import { sendWorkloadMetadata } from '../../../transmitter';
7-
import { IWorkload } from '../../../transmitter/types';
7+
import { IWorkload, Telemetry } from '../../../transmitter/types';
88
import { constructWorkloadMetadata } from '../../../transmitter/payload';
99
import { buildMetadataForWorkload } from '../../metadata-extractor';
1010
import { PodPhase } from '../types';
@@ -13,6 +13,13 @@ import { FALSY_WORKLOAD_NAME_MARKER } from './types';
1313
import { WorkloadKind } from '../../types';
1414
import { deleteWorkload } from './workload';
1515

16+
export interface ImagesToScanQueueData {
17+
workloadMetadata: IWorkload[];
18+
imageKeys: string[];
19+
/** The timestamp when this workload was added to the image scan queue. */
20+
enqueueTimestampMs: number;
21+
}
22+
1623
function deleteFailedKeysFromState(keys): void {
1724
try {
1825
for (const key of keys) {
@@ -33,10 +40,19 @@ function deleteFailedKeysFromState(keys): void {
3340
}
3441
}
3542

36-
async function queueWorkerWorkloadScan(task, callback): Promise<void> {
37-
const { workloadMetadata, imageKeys } = task;
43+
async function queueWorkerWorkloadScan(
44+
task: ImagesToScanQueueData,
45+
callback,
46+
): Promise<void> {
47+
const { workloadMetadata, imageKeys, enqueueTimestampMs } = task;
48+
/** Represents how long this workload spent waiting in the queue to be processed. */
49+
const enqueueDurationMs = Date.now() - enqueueTimestampMs;
50+
const telemetry: Partial<Telemetry> = {
51+
enqueueDurationMs,
52+
queueSize: workloadsToScanQueue.length(),
53+
};
3854
try {
39-
await processWorkload(workloadMetadata);
55+
await processWorkload(workloadMetadata, telemetry);
4056
} catch (err) {
4157
logger.error(
4258
{ err, task },
@@ -46,7 +62,7 @@ async function queueWorkerWorkloadScan(task, callback): Promise<void> {
4662
}
4763
}
4864

49-
const workloadsToScanQueue = async.queue(
65+
const workloadsToScanQueue = async.queue<ImagesToScanQueueData>(
5066
queueWorkerWorkloadScan,
5167
config.WORKLOADS_TO_SCAN_QUEUE_WORKER_COUNT,
5268
);
@@ -82,7 +98,11 @@ function handleReadyPod(workloadMetadata: IWorkload[]): void {
8298
}
8399

84100
if (imagesToScan.length > 0) {
85-
workloadsToScanQueue.push({ workloadMetadata: imagesToScan, imageKeys });
101+
workloadsToScanQueue.push({
102+
workloadMetadata: imagesToScan,
103+
imageKeys,
104+
enqueueTimestampMs: Date.now(),
105+
});
86106
}
87107
}
88108

src/transmitter/payload.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import {
1212
ScanResultsPayload,
1313
IDependencyGraphPayload,
1414
IWorkloadEventsPolicyPayload,
15+
Telemetry,
1516
} from './types';
1617
import { state } from '../state';
1718

@@ -51,6 +52,7 @@ export function constructDepGraph(
5152
export function constructScanResults(
5253
scannedImages: IScanResult[],
5354
workloadMetadata: IWorkload[],
55+
telemetry: Partial<Telemetry>,
5456
): ScanResultsPayload[] {
5557
return scannedImages.map<ScanResultsPayload>((scannedImage) => {
5658
// We know that .find() won't return undefined
@@ -72,6 +74,7 @@ export function constructScanResults(
7274
};
7375

7476
return {
77+
telemetry,
7578
imageLocator,
7679
agentId: config.AGENT_ID,
7780
scanResults: scannedImage.scanResults,

src/transmitter/types.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ export interface ScanResultsPayload {
4242
imageLocator: IImageLocator;
4343
agentId: string;
4444
scanResults: ScanResult[];
45+
telemetry: Partial<Telemetry>;
4546
}
4647

4748
export interface IWorkloadMetadataPayload {
@@ -96,3 +97,10 @@ export interface IRequestError {
9697
code: string;
9798
message: string;
9899
}
100+
101+
export interface Telemetry {
102+
enqueueDurationMs: number;
103+
queueSize: number;
104+
imagePullDurationMs: number;
105+
imageScanDurationMs: number;
106+
}

test/system/kind.spec.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,12 @@ test('Kubernetes-Monitor with KinD', async (jestDoneCallback) => {
250250
try {
251251
expect(requestBody).toEqual<transmitterTypes.ScanResultsPayload>({
252252
agentId,
253+
telemetry: {
254+
enqueueDurationMs: expect.any(Number),
255+
imagePullDurationMs: expect.any(Number),
256+
imageScanDurationMs: expect.any(Number),
257+
queueSize: expect.any(Number),
258+
},
253259
imageLocator: expect.objectContaining({
254260
imageId: expect.any(String),
255261
}),

test/unit/transmitter-payload.spec.ts

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,14 @@ describe('transmitter payload tests', () => {
6060
},
6161
];
6262

63+
const telemetry: Record<string, number> = {};
64+
6365
expect(() =>
64-
payload.constructScanResults(scannedImages, workloadMetadata),
66+
payload.constructScanResults(
67+
scannedImages,
68+
workloadMetadata,
69+
telemetry,
70+
),
6571
).toThrow();
6672
},
6773
);
@@ -97,6 +103,7 @@ describe('transmitter payload tests', () => {
97103
podSpec: podSpecFixture,
98104
},
99105
];
106+
const telemetry: Record<string, number> = {};
100107

101108
// These values are populated at runtime (injected by the deployment) so we have to mock them
102109
// to make sure the function uses them to construct the payload (otherwise they are undefined).
@@ -110,6 +117,7 @@ describe('transmitter payload tests', () => {
110117
const payloads = payload.constructScanResults(
111118
scannedImages,
112119
workloadMetadata,
120+
telemetry,
113121
);
114122
expect(payloads).toHaveLength(1);
115123

0 commit comments

Comments
 (0)