Skip to content

Commit 71e741d

Browse files
add cancellation support
1 parent 3b443f9 commit 71e741d

File tree

6 files changed

+130
-12
lines changed

6 files changed

+130
-12
lines changed

Database/JobState.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,6 @@ public enum JobState
66
Queued = 1,
77
InProgress = 2,
88
Completed = 3,
9-
Vanished = 4
9+
Vanished = 4,
10+
Cancelled = 5
1011
}

Database/RunnerStatus.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,5 +11,6 @@ public enum RunnerStatus
1111
Deleted = 6,
1212
Failure = 7,
1313
VanishedOnCloud = 8,
14-
Cleanup = 9
14+
Cleanup = 9,
15+
Cancelled = 10
1516
}

GithubActionsOrchestrator.IntegrationTests/appsettings.test.json

Lines changed: 47 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,57 @@
22
"TestConfiguration": {
33
"RunnerPrefix": "ghr-test",
44
"MinVmId": 20000,
5-
"PveHost": "${PVE_HOST}",
6-
"PveUsername": "${PVE_USERNAME}",
7-
"PvePassword": "${PVE_PASSWORD}",
5+
"PveHost": "10.128.1.2",
6+
"PveUsername": "ghrunners@pve",
7+
"PvePassword": "sEWqPNArmMFUu6wsCwuXUqMVfAe74F26",
88
"PveTemplate": 170,
99
"ProvisionScriptBaseUrl": "https://example.com/scripts",
1010
"MetricUser": "test-user",
1111
"MetricPassword": "test-password",
1212
"ControllerUrl": "http://localhost:5000",
13-
"GithubAgentVersion": "2.0.0"
13+
"GithubAgentVersion": "2.0.0",
14+
"DbConnectionString": "Host=localhost;Port=5433;Database=github_actions_orchestrator_test;Username=test_user;Password=test_password",
15+
"Sizes": [
16+
{
17+
"Name": "small",
18+
"Arch": "x64",
19+
"VmTypes": [
20+
{
21+
"Cloud": "pve",
22+
"VmType": "2c2g",
23+
"Priority": 1
24+
}
25+
]
26+
},
27+
{
28+
"Name": "medium",
29+
"Arch": "x64",
30+
"VmTypes": [
31+
{
32+
"Cloud": "pve",
33+
"VmType": "4c4g",
34+
"Priority": 1
35+
}
36+
]
37+
}
38+
],
39+
"Profiles": [
40+
{
41+
"Name": "default",
42+
"ScriptName": "ubuntu",
43+
"ScriptVersion": 1,
44+
"OsImageName": "ubuntu-22.04",
45+
"IsCustomImage": false,
46+
"UsePrivateNetworks": false
47+
},
48+
{
49+
"Name": "test-profile",
50+
"ScriptName": "test",
51+
"ScriptVersion": 1,
52+
"OsImageName": "test-image",
53+
"IsCustomImage": true,
54+
"UsePrivateNetworks": false
55+
}
56+
]
1457
}
1558
}

Models/RunnerQueue.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,6 @@ public class RunnerQueue
88
public ConcurrentQueue<DeleteRunnerTask> DeleteTasks { get; } = new();
99

1010
public ConcurrentDictionary<string, CreateRunnerTask> CreatedRunners { get; } = new();
11+
12+
public ConcurrentDictionary<(string Owner, string Repository, string Size, string Profile, string Arch), int> CancelledRunners { get; } = new();
1113
}

PoolManager.cs

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -866,7 +866,39 @@ private async Task<bool> CreateRunner(CreateRunnerTask rt)
866866
{
867867
var db = new ActionsRunnerContext();
868868
var runner = await db.Runners.Include(x => x.Lifecycle).FirstOrDefaultAsync(x => x.RunnerId == rt.RunnerDbId);
869-
869+
870+
// Check if this runner creation should be skipped due to job cancellation
871+
var key = (runner.Owner, rt.RepoName, runner.Size, runner.Profile, runner.Arch);
872+
bool shouldSkip = false;
873+
874+
_queues.CancelledRunners.AddOrUpdate(
875+
key,
876+
0, // If key doesn't exist, don't skip
877+
(k, count) =>
878+
{
879+
if (count > 0)
880+
{
881+
shouldSkip = true;
882+
return count - 1; // Decrement counter
883+
}
884+
return 0;
885+
});
886+
887+
if (shouldSkip)
888+
{
889+
_logger.LogInformation($"Skipping runner creation for cancelled job: Owner={runner.Owner}, Repo={rt.RepoName}, Size={runner.Size}, Profile={runner.Profile}, Arch={runner.Arch}");
890+
891+
runner.Lifecycle.Add(new RunnerLifecycle
892+
{
893+
Status = RunnerStatus.Cancelled,
894+
EventTimeUtc = DateTime.UtcNow,
895+
Event = "Runner creation skipped - job was cancelled"
896+
});
897+
898+
await db.SaveChangesAsync();
899+
return true;
900+
}
901+
870902
// Check if cloud is stable atm
871903

872904
var possibleProviders =

Program.cs

Lines changed: 44 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -306,11 +306,32 @@ private static async Task<IResult> GithubWebhookHandler(HttpRequest request, [Fr
306306
await JobInProgress(workflowJson, logger, jobId, repoName, orgName);
307307
break;
308308
case "completed":
309+
string conclusion = String.Empty;
310+
if (json.RootElement.TryGetProperty("conclusion", out JsonElement conclusionJson))
311+
{
312+
conclusion = conclusionJson.GetString() ?? string.Empty;
313+
}
314+
309315
var dbWorkflowComplete = await db.Jobs.FirstOrDefaultAsync(x => x.GithubJobId == jobId);
310-
dbWorkflowComplete.State = JobState.Completed;
311316
dbWorkflowComplete.CompleteTime = DateTime.UtcNow;
312-
await db.SaveChangesAsync();
313-
await JobCompleted(logger, jobId, poolMgr, repoName, orgName, workflowJson);
317+
bool wasCancelled = false;
318+
switch (conclusion)
319+
{
320+
case "cancelled":
321+
dbWorkflowComplete.State = JobState.Cancelled;
322+
await db.SaveChangesAsync();
323+
wasCancelled = true;
324+
325+
// somehow clean runners from creation queue if not started yet.
326+
327+
break;
328+
default:
329+
dbWorkflowComplete.State = JobState.Completed;
330+
await db.SaveChangesAsync();
331+
break;
332+
}
333+
334+
await JobCompleted(logger, jobId, poolMgr, repoName, orgName, workflowJson, wasCancelled);
314335
break;
315336
default:
316337
logger.LogWarning("Unknown action. Ignoring");
@@ -488,7 +509,7 @@ private static async Task<IResult> AddRunnerManuallyHandler(HttpRequest request,
488509
return Results.StatusCode(201);
489510
}
490511

491-
private static async Task JobCompleted(ILogger<Program> logger, long jobId, RunnerQueue poolMgr, string repoName, string orgName, JsonElement workflowJson)
512+
private static async Task JobCompleted(ILogger<Program> logger, long jobId, RunnerQueue poolMgr, string repoName, string orgName, JsonElement workflowJson, bool wasCancelled)
492513
{
493514
var db = new ActionsRunnerContext();
494515
var job = await db.Jobs
@@ -523,7 +544,25 @@ await db.Jobs.AddAsync(new Job
523544
logger.LogError($"No VM on record for JobID: {jobId}. Trying to re-link to {runnerName}.");
524545
jobRunner = await db.LinkJobToRunner(jobId, runnerName);
525546

526-
if (jobRunner == null)
547+
if (wasCancelled && jobRunner == null)
548+
{
549+
// job was cancelled before a runner was picked by the job
550+
// it's either still in the creation queue or it is still provisioning
551+
552+
// Get arch from the requested size
553+
string arch = Config.Sizes.FirstOrDefault(x => x.Name == job.RequestedSize)?.Arch ?? "x64";
554+
555+
// Build the cancellation key
556+
var key = (job.Owner, job.Repository, job.RequestedSize, job.RequestedProfile, arch);
557+
558+
// Increment the cancelled runners counter
559+
int count = poolMgr.CancelledRunners.AddOrUpdate(key, 1, (k, currentCount) => currentCount + 1);
560+
561+
logger.LogInformation($"Registered cancelled job {jobId} for {job.Owner}/{job.Repository} size={job.RequestedSize} profile={job.RequestedProfile} arch={arch}. Counter now at {count}.");
562+
563+
return;
564+
}
565+
else if (jobRunner == null)
527566
{
528567
logger.LogError("Unable to link runner. aborting");
529568
return;

0 commit comments

Comments
 (0)