@@ -2,13 +2,17 @@ import {
22 CreateFleetCommand ,
33 CreateFleetResult ,
44 CreateTagsCommand ,
5+ DefaultTargetCapacityType ,
56 DescribeInstancesCommand ,
67 DescribeInstancesResult ,
78 EC2Client ,
89 FleetLaunchTemplateOverridesRequest ,
910 Tag ,
1011 TerminateInstancesCommand ,
12+ StopInstancesCommand ,
13+ StartInstancesCommand ,
1114 _InstanceType ,
15+ Filter ,
1216} from '@aws-sdk/client-ec2' ;
1317import { createChildLogger } from '@aws-github-runner/aws-powertools-util' ;
1418import { getTracedAWSV3Client , tracer } from '@aws-github-runner/aws-powertools-util' ;
@@ -167,15 +171,12 @@ async function processFleetResult(
167171 ) ;
168172 const errors = fleet . Errors ?. flatMap ( ( e ) => e . ErrorCode || '' ) || [ ] ;
169173
170- // Educated guess of errors that would make sense to retry based on the list
171- // https://docs.aws.amazon.com/AWSEC2/latest/APIReference/errors-overview.html
172174 const scaleErrors = [
173175 'UnfulfillableCapacity' ,
174176 'MaxSpotInstanceCountExceeded' ,
175177 'TargetCapacityLimitExceededException' ,
176178 'RequestLimitExceeded' ,
177179 'ResourceLimitExceeded' ,
178- 'MaxSpotInstanceCountExceeded' ,
179180 'MaxSpotFleetRequestCountExceeded' ,
180181 'InsufficientInstanceCapacity' ,
181182 ] ;
@@ -184,7 +185,7 @@ async function processFleetResult(
184185 errors . some ( ( e ) => runnerParameters . onDemandFailoverOnError ?. includes ( e ) ) &&
185186 runnerParameters . ec2instanceCriteria . targetCapacityType === 'spot'
186187 ) {
187- logger . warn ( `Create fleet failed, initatiing fall back to on demand instances.` ) ;
188+ logger . warn ( `Create fleet failed, initiating fall back to on demand instances.` ) ;
188189 logger . debug ( 'Create fleet failed.' , { data : fleet . Errors } ) ;
189190 const numberOfInstances = runnerParameters . numberOfRunners - instances . length ;
190191 const instancesOnDemand = await createRunner ( {
@@ -218,12 +219,10 @@ async function getAmiIdOverride(runnerParameters: Runners.RunnerInputParameters)
218219 return amiIdOverride ;
219220 } catch ( e ) {
220221 logger . debug (
221- `Failed to lookup runner AMI ID from SSM parameter: ${ runnerParameters . amiIdSsmParameterName } . ` +
222- 'Please ensure that the given parameter exists on this region and contains a valid runner AMI ID' ,
222+ `Failed to lookup runner AMI ID from SSM parameter: ${ runnerParameters . amiIdSsmParameterName } .` ,
223223 { error : e } ,
224224 ) ;
225- throw new Error ( `Failed to lookup runner AMI ID from SSM parameter: ${ runnerParameters . amiIdSsmParameterName } ,
226- ${ e } ` ) ;
225+ throw new Error ( `Failed to lookup runner AMI ID from SSM parameter: ${ runnerParameters . amiIdSsmParameterName } , ${ e } ` ) ;
227226 }
228227}
229228
@@ -244,54 +243,80 @@ async function createInstances(
244243 tags . push ( { Key : 'ghr:trace_id' , Value : traceId ! } ) ;
245244 }
246245
247- let fleet : CreateFleetResult ;
248- try {
249- // see for spec https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_CreateFleet.html
250- const createFleetCommand = new CreateFleetCommand ( {
251- LaunchTemplateConfigs : [
252- {
253- LaunchTemplateSpecification : {
254- LaunchTemplateName : runnerParameters . launchTemplateName ,
255- Version : '$Default' ,
256- } ,
257- Overrides : generateFleetOverrides (
258- runnerParameters . subnets ,
259- runnerParameters . ec2instanceCriteria . instanceTypes ,
260- amiIdOverride ,
261- ) ,
246+ const createFleetCommand = new CreateFleetCommand ( {
247+ LaunchTemplateConfigs : [
248+ {
249+ LaunchTemplateSpecification : {
250+ LaunchTemplateName : runnerParameters . launchTemplateName ,
251+ Version : '$Default' ,
262252 } ,
263- ] ,
264- SpotOptions : {
265- MaxTotalPrice : runnerParameters . ec2instanceCriteria . maxSpotPrice ,
266- AllocationStrategy : runnerParameters . ec2instanceCriteria . instanceAllocationStrategy ,
253+ Overrides : generateFleetOverrides (
254+ runnerParameters . subnets ,
255+ runnerParameters . ec2instanceCriteria . instanceTypes ,
256+ amiIdOverride ,
257+ ) ,
267258 } ,
268- TargetCapacitySpecification : {
269- TotalTargetCapacity : runnerParameters . numberOfRunners ,
270- DefaultTargetCapacityType : runnerParameters . ec2instanceCriteria . targetCapacityType ,
259+ ] ,
260+ SpotOptions : {
261+ MaxTotalPrice : runnerParameters . ec2instanceCriteria . maxSpotPrice ,
262+ AllocationStrategy : runnerParameters . ec2instanceCriteria . instanceAllocationStrategy ,
263+ } ,
264+ TargetCapacitySpecification : {
265+ TotalTargetCapacity : runnerParameters . numberOfRunners ,
266+ DefaultTargetCapacityType : runnerParameters . ec2instanceCriteria . targetCapacityType ,
267+ } ,
268+ TagSpecifications : [
269+ {
270+ ResourceType : 'instance' ,
271+ Tags : tags ,
271272 } ,
272- TagSpecifications : [
273- {
274- ResourceType : 'instance' ,
275- Tags : tags ,
276- } ,
277- {
278- ResourceType : 'volume' ,
279- Tags : tags ,
280- } ,
281- ] ,
282- Type : 'instant' ,
283- } ) ;
284- fleet = await ec2Client . send ( createFleetCommand ) ;
285- } catch ( e ) {
286- logger . warn ( 'Create fleet request failed.' , { error : e as Error } ) ;
287- throw e ;
288- }
289- return fleet ;
273+ {
274+ ResourceType : 'volume' ,
275+ Tags : tags ,
276+ } ,
277+ ] ,
278+ Type : 'instant' ,
279+ } ) ;
280+ return await ec2Client . send ( createFleetCommand ) ;
290281}
291282
292- // If launchTime is undefined, this will return false
293283export function bootTimeExceeded ( ec2Runner : { launchTime ?: Date } ) : boolean {
294284 const runnerBootTimeInMinutes = process . env . RUNNER_BOOT_TIME_IN_MINUTES ;
295285 const launchTimePlusBootTime = moment ( ec2Runner . launchTime ) . utc ( ) . add ( runnerBootTimeInMinutes , 'minutes' ) ;
296286 return launchTimePlusBootTime < moment ( new Date ( ) ) . utc ( ) ;
297287}
288+
289+ // New: Hibernate the runner instance
290+ export async function hibernateRunner ( instanceId : string ) : Promise < void > {
291+ logger . debug ( `Runner '${ instanceId } ' will be hibernated (stopped with hibernation).` ) ;
292+ const ec2 = getTracedAWSV3Client ( new EC2Client ( { region : process . env . AWS_REGION } ) ) ;
293+ // Note: Stopping an instance that supports hibernation and has hibernation configured will hibernate it.
294+ await ec2 . send ( new StopInstancesCommand ( { InstanceIds : [ instanceId ] , Hibernate : true } ) ) ;
295+ await tag ( instanceId , [ { Key : 'ghr:hibernated' , Value : 'true' } ] ) ;
296+ logger . debug ( `Runner ${ instanceId } has been hibernated.` ) ;
297+ }
298+
299+ // Helper function to resume hibernated instances
300+ export async function resumeHibernatedInstances ( count : number ) : Promise < string [ ] > {
301+ const ec2 = getTracedAWSV3Client ( new EC2Client ( { region : process . env . AWS_REGION } ) ) ;
302+ const filters : Filter [ ] = [
303+ { Name : 'instance-state-name' , Values : [ 'stopped' ] } ,
304+ { Name : 'tag:ghr:hibernated' , Values : [ 'true' ] } ,
305+ { Name : 'tag:ghr:Application' , Values : [ 'github-action-runner' ] } ,
306+ ] ;
307+
308+ const desc = await ec2 . send ( new DescribeInstancesCommand ( { Filters : filters } ) ) ;
309+ const stoppedInstances = desc . Reservations ?. flatMap ( r => r . Instances ?. map ( i => i . InstanceId ! ) || [ ] ) || [ ] ;
310+
311+ const toResume = stoppedInstances . slice ( 0 , count ) ;
312+ if ( toResume . length > 0 ) {
313+ logger . info ( `Resuming hibernated instances: ${ toResume . join ( ',' ) } ` ) ;
314+ await ec2 . send ( new StartInstancesCommand ( { InstanceIds : toResume } ) ) ;
315+ // Optionally remove hibernated tag after start
316+ for ( const instanceId of toResume ) {
317+ await tag ( instanceId , [ { Key : 'ghr:hibernated' , Value : 'false' } ] ) ;
318+ }
319+ }
320+
321+ return toResume ;
322+ }
0 commit comments