@@ -10,6 +10,7 @@ import { createRunner, listEC2Runners } from './../aws/runners';
1010import { RunnerInputParameters } from './../aws/runners.d' ;
1111import * as scaleUpModule from './scale-up' ;
1212import { getParameter } from '@aws-github-runner/aws-ssm-util' ;
13+ import { publishRetryMessage } from './job-retry' ;
1314import { describe , it , expect , beforeEach , vi } from 'vitest' ;
1415import type { Octokit } from '@octokit/rest' ;
1516
@@ -33,6 +34,7 @@ const mockCreateRunner = vi.mocked(createRunner);
3334const mockListRunners = vi . mocked ( listEC2Runners ) ;
3435const mockSSMClient = mockClient ( SSMClient ) ;
3536const mockSSMgetParameter = vi . mocked ( getParameter ) ;
37+ const mockPublishRetryMessage = vi . mocked ( publishRetryMessage ) ;
3638
3739vi . mock ( '@octokit/rest' , ( ) => ( {
3840 Octokit : vi . fn ( ) . mockImplementation ( function ( ) {
@@ -63,6 +65,11 @@ vi.mock('@aws-github-runner/aws-ssm-util', async () => {
6365 } ;
6466} ) ;
6567
68+ vi . mock ( './job-retry' , ( ) => ( {
69+ publishRetryMessage : vi . fn ( ) ,
70+ checkAndRetryJob : vi . fn ( ) ,
71+ } ) ) ;
72+
6673export type RunnerType = 'ephemeral' | 'non-ephemeral' ;
6774
6875// for ephemeral and non-ephemeral runners
@@ -1667,6 +1674,159 @@ describe('scaleUp with Github Data Residency', () => {
16671674 } ) ;
16681675} ) ;
16691676
1677+ describe ( 'Retry mechanism tests' , ( ) => {
1678+ beforeEach ( ( ) => {
1679+ process . env . ENABLE_ORGANIZATION_RUNNERS = 'true' ;
1680+ process . env . ENABLE_EPHEMERAL_RUNNERS = 'true' ;
1681+ process . env . ENABLE_JOB_QUEUED_CHECK = 'true' ;
1682+ process . env . RUNNERS_MAXIMUM_COUNT = '10' ;
1683+ expectedRunnerParams = { ...EXPECTED_RUNNER_PARAMS } ;
1684+ mockSSMClient . reset ( ) ;
1685+ } ) ;
1686+
1687+ const createTestMessages = (
1688+ count : number ,
1689+ overrides : Partial < scaleUpModule . ActionRequestMessageSQS > [ ] = [ ] ,
1690+ ) : scaleUpModule . ActionRequestMessageSQS [ ] => {
1691+ return Array . from ( { length : count } , ( _ , i ) => ( {
1692+ ...TEST_DATA_SINGLE ,
1693+ id : i + 1 ,
1694+ messageId : `message-${ i + 1 } ` ,
1695+ ...overrides [ i ] ,
1696+ } ) ) ;
1697+ } ;
1698+
1699+ it ( 'calls publishRetryMessage for each valid message when job is queued' , async ( ) => {
1700+ const messages = createTestMessages ( 3 ) ;
1701+
1702+ await scaleUpModule . scaleUp ( messages ) ;
1703+
1704+ expect ( mockPublishRetryMessage ) . toHaveBeenCalledTimes ( 3 ) ;
1705+ expect ( mockPublishRetryMessage ) . toHaveBeenNthCalledWith (
1706+ 1 ,
1707+ expect . objectContaining ( {
1708+ id : 1 ,
1709+ messageId : 'message-1' ,
1710+ } ) ,
1711+ ) ;
1712+ expect ( mockPublishRetryMessage ) . toHaveBeenNthCalledWith (
1713+ 2 ,
1714+ expect . objectContaining ( {
1715+ id : 2 ,
1716+ messageId : 'message-2' ,
1717+ } ) ,
1718+ ) ;
1719+ expect ( mockPublishRetryMessage ) . toHaveBeenNthCalledWith (
1720+ 3 ,
1721+ expect . objectContaining ( {
1722+ id : 3 ,
1723+ messageId : 'message-3' ,
1724+ } ) ,
1725+ ) ;
1726+ } ) ;
1727+
1728+ it ( 'does not call publishRetryMessage when job is not queued' , async ( ) => {
1729+ mockOctokit . actions . getJobForWorkflowRun . mockImplementation ( ( params ) => {
1730+ const isQueued = params . job_id === 1 ; // Only job 1 is queued
1731+ return {
1732+ data : {
1733+ status : isQueued ? 'queued' : 'completed' ,
1734+ } ,
1735+ } ;
1736+ } ) ;
1737+
1738+ const messages = createTestMessages ( 3 ) ;
1739+
1740+ await scaleUpModule . scaleUp ( messages ) ;
1741+
1742+ // Only message with id 1 should trigger retry
1743+ expect ( mockPublishRetryMessage ) . toHaveBeenCalledTimes ( 1 ) ;
1744+ expect ( mockPublishRetryMessage ) . toHaveBeenCalledWith (
1745+ expect . objectContaining ( {
1746+ id : 1 ,
1747+ messageId : 'message-1' ,
1748+ } ) ,
1749+ ) ;
1750+ } ) ;
1751+
1752+ it ( 'calls publishRetryMessage even when maximum runners is reached' , async ( ) => {
1753+ process . env . RUNNERS_MAXIMUM_COUNT = '0' ; // No runners can be created
1754+
1755+ const messages = createTestMessages ( 2 ) ;
1756+
1757+ await scaleUpModule . scaleUp ( messages ) ;
1758+
1759+ // publishRetryMessage should still be called even though no runners will be created
1760+ expect ( mockPublishRetryMessage ) . toHaveBeenCalledTimes ( 2 ) ;
1761+ expect ( createRunner ) . not . toHaveBeenCalled ( ) ;
1762+ } ) ;
1763+
1764+ it ( 'calls publishRetryMessage with correct message structure including retry counter' , async ( ) => {
1765+ const message = {
1766+ ...TEST_DATA_SINGLE ,
1767+ messageId : 'test-message-id' ,
1768+ retryCounter : 2 ,
1769+ } ;
1770+
1771+ await scaleUpModule . scaleUp ( [ message ] ) ;
1772+
1773+ expect ( mockPublishRetryMessage ) . toHaveBeenCalledWith (
1774+ expect . objectContaining ( {
1775+ id : message . id ,
1776+ messageId : 'test-message-id' ,
1777+ retryCounter : 2 ,
1778+ } ) ,
1779+ ) ;
1780+ } ) ;
1781+
1782+ it ( 'calls publishRetryMessage when ENABLE_JOB_QUEUED_CHECK is false' , async ( ) => {
1783+ process . env . ENABLE_JOB_QUEUED_CHECK = 'false' ;
1784+
1785+ const messages = createTestMessages ( 2 ) ;
1786+
1787+ await scaleUpModule . scaleUp ( messages ) ;
1788+
1789+ // Should always call publishRetryMessage when queue check is disabled
1790+ expect ( mockPublishRetryMessage ) . toHaveBeenCalledTimes ( 2 ) ;
1791+ expect ( mockOctokit . actions . getJobForWorkflowRun ) . not . toHaveBeenCalled ( ) ;
1792+ } ) ;
1793+
1794+ it ( 'calls publishRetryMessage for each message in a multi-runner scenario' , async ( ) => {
1795+ const messages = createTestMessages ( 5 ) ;
1796+
1797+ await scaleUpModule . scaleUp ( messages ) ;
1798+
1799+ expect ( mockPublishRetryMessage ) . toHaveBeenCalledTimes ( 5 ) ;
1800+ messages . forEach ( ( msg , index ) => {
1801+ expect ( mockPublishRetryMessage ) . toHaveBeenNthCalledWith (
1802+ index + 1 ,
1803+ expect . objectContaining ( {
1804+ id : msg . id ,
1805+ messageId : msg . messageId ,
1806+ } ) ,
1807+ ) ;
1808+ } ) ;
1809+ } ) ;
1810+
1811+ it ( 'calls publishRetryMessage before runner creation' , async ( ) => {
1812+ const messages = createTestMessages ( 1 ) ;
1813+
1814+ const callOrder : string [ ] = [ ] ;
1815+ mockPublishRetryMessage . mockImplementation ( ( ) => {
1816+ callOrder . push ( 'publishRetryMessage' ) ;
1817+ return Promise . resolve ( ) ;
1818+ } ) ;
1819+ mockCreateRunner . mockImplementation ( async ( ) => {
1820+ callOrder . push ( 'createRunner' ) ;
1821+ return [ 'i-12345' ] ;
1822+ } ) ;
1823+
1824+ await scaleUpModule . scaleUp ( messages ) ;
1825+
1826+ expect ( callOrder ) . toEqual ( [ 'publishRetryMessage' , 'createRunner' ] ) ;
1827+ } ) ;
1828+ } ) ;
1829+
16701830function defaultOctokitMockImpl ( ) {
16711831 mockOctokit . actions . getJobForWorkflowRun . mockImplementation ( ( ) => ( {
16721832 data : {
0 commit comments