Skip to content

Commit 704b572

Browse files
npalmiainlane
authored andcommitted
handle ScaleError
1 parent 68720ab commit 704b572

File tree

4 files changed

+25
-11
lines changed

4 files changed

+25
-11
lines changed

lambdas/functions/control-plane/src/aws/runners.ts

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,6 @@ async function processFleetResult(
205205
instances.push(...instancesOnDemand);
206206
return instances;
207207
} else if ((failedCount = countScaleErrors(errors, scaleErrors)) > 0) {
208-
209208
logger.warn('Create fleet failed, ScaleError will be thrown to trigger retry for ephemeral runners.');
210209
logger.debug('Create fleet failed.', { data: fleet.Errors });
211210
throw new ScaleError('Failed to create instance, create fleet failed.', failedCount);
@@ -218,7 +217,7 @@ async function processFleetResult(
218217
}
219218

220219
function countScaleErrors(errors: string[], scaleErrors: string[]): number {
221-
return errors.reduce((acc, e) => scaleErrors.includes(e) ? acc + 1 : acc, 0);
220+
return errors.reduce((acc, e) => (scaleErrors.includes(e) ? acc + 1 : acc), 0);
222221
}
223222

224223
async function getAmiIdOverride(runnerParameters: Runners.RunnerInputParameters): Promise<string | undefined> {

lambdas/functions/control-plane/src/lambda.test.ts

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,11 @@ const sqsRecord: SQSRecord = {
2828
},
2929
awsRegion: '',
3030
body: JSON.stringify(body),
31-
eventSource: 'aws:SQS',
31+
eventSource: 'aws:sqs',
3232
eventSourceARN: '',
3333
md5OfBody: '',
3434
messageAttributes: {},
35-
messageId: '',
35+
messageId: 'abcd1234',
3636
receiptHandle: '',
3737
};
3838

@@ -109,14 +109,16 @@ describe('Test scale up lambda wrapper.', () => {
109109
await expect(scaleUpHandler(sqsEvent, context)).resolves.not.toThrow();
110110
});
111111

112-
it('Scale should be rejected', async () => {
112+
it('Scale should create a batch failure message', async () => {
113113
const error = new ScaleError('Scale should be rejected');
114114
const mock = vi.fn() as MockedFunction<typeof scaleUp>;
115115
mock.mockImplementation(() => {
116116
return Promise.reject(error);
117117
});
118118
vi.mocked(scaleUp).mockImplementation(mock);
119-
await expect(scaleUpHandler(sqsEvent, context)).rejects.toThrow(error);
119+
await expect(scaleUpHandler(sqsEvent, context)).resolves.toEqual({
120+
batchItemFailures: [{ itemIdentifier: sqsRecord.messageId }],
121+
});
120122
});
121123

122124
describe('Batch processing', () => {
@@ -240,12 +242,14 @@ describe('Test scale up lambda wrapper.', () => {
240242
const records = createMultipleRecords(2);
241243
const multiRecordEvent: SQSEvent = { Records: records };
242244

243-
const error = new ScaleError('Critical scaling error');
245+
const error = new ScaleError('Critical scaling error', 2);
244246
const mock = vi.fn(scaleUp);
245247
mock.mockImplementation(() => Promise.reject(error));
246248
vi.mocked(scaleUp).mockImplementation(mock);
247249

248-
await expect(scaleUpHandler(multiRecordEvent, context)).rejects.toThrow(error);
250+
await expect(scaleUpHandler(multiRecordEvent, context)).resolves.toEqual({
251+
batchItemFailures: [{ itemIdentifier: 'message-0' }, { itemIdentifier: 'message-1' }],
252+
});
249253
});
250254
});
251255
});

lambdas/functions/control-plane/src/lambda.ts

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,10 +62,21 @@ export async function scaleUpHandler(event: SQSEvent, context: Context): Promise
6262
return { batchItemFailures };
6363
} catch (e) {
6464
if (e instanceof ScaleError) {
65-
throw e;
65+
for (let i = 0; i < e.failedInstanceCount; i++) {
66+
batchItemFailures.push({
67+
itemIdentifier: sqsMessages[i].messageId,
68+
});
69+
}
70+
logger.warn(
71+
`ScaleError detected, ${e.failedInstanceCount} could not be created. A retry will be attempted via SQS.`,
72+
{ error: e },
73+
);
74+
} else {
75+
logger.error(`Error processing batch (size: ${sqsMessages.length}): ${(e as Error).message}, ignoring batch`, {
76+
error: e,
77+
});
6678
}
6779

68-
logger.warn(`Will retry error: ${e}`);
6980
return { batchItemFailures };
7081
}
7182
}

lambdas/functions/control-plane/src/scale-runners/ScaleError.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
class ScaleError extends Error {
22
constructor(
33
public message: string,
4-
public failedInstanceCount: number = 1
4+
public failedInstanceCount: number = 1,
55
) {
66
super(message);
77
this.name = 'ScaleError';

0 commit comments

Comments
 (0)