Skip to content

Commit 88a9541

Browse files
authored
Merge pull request #23 from infosiftr/lower-retry
Sort trigger-arch build queue to push previously failed jobs to the end
2 parents b446700 + 7d61be7 commit 88a9541

File tree

1 file changed

+80
-22
lines changed

1 file changed

+80
-22
lines changed

Jenkinsfile.trigger

Lines changed: 80 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,10 @@ env.BASHBREW_ARCH = env.JOB_NAME.split('/')[-1].minus('trigger-') // "windows-am
1313
def queue = []
1414
def breakEarly = false // thanks Jenkins...
1515

16+
// this includes the number of attempts per failing buildId
17+
// { buildId: { "count": 1, ... }, ... }
18+
def pastFailedJobsJson = '{}'
19+
1620
node {
1721
stage('Checkout') {
1822
checkout(scmGit(
@@ -32,33 +36,49 @@ node {
3236
[$class: 'RelativeTargetDirectory', relativeTargetDir: 'meta'],
3337
],
3438
))
39+
pastFailedJobsJson = sh(returnStdout: true, script: '''#!/usr/bin/env bash
40+
set -Eeuo pipefail -x
41+
42+
if ! json="$(wget -qO- "$JOB_URL/lastSuccessfulBuild/artifact/pastFailedJobs.json")"; then
43+
echo >&2 'failed to get pastFailedJobs.json'
44+
json='{}'
45+
fi
46+
jq <<<"$json" '.'
47+
''').trim()
3548
}
3649

3750
dir('meta') {
38-
def queueJSON = ''
51+
def queueJson = ''
3952
stage('Queue') {
40-
// TODO this job should export a JSON file that includes the number of attempts so far per failing buildId, and then this list should inject those values, initialize missing to 0, and sort by attempts so that failing builds always live at the bottom of the queue
41-
queueJSON = sh(returnStdout: true, script: '''
42-
jq -L.scripts '
43-
include "meta";
44-
[
45-
.[]
46-
| select(
47-
needs_build
48-
and (
49-
.build.arch as $arch
50-
| if env.BASHBREW_ARCH == "gha" then
51-
[ "amd64", "i386", "windows-amd64" ]
52-
else [ env.BASHBREW_ARCH ] end
53-
| index($arch)
53+
withEnv([
54+
'pastFailedJobsJson=' + pastFailedJobsJson,
55+
]) {
56+
// using pastFailedJobsJson, sort the needs_build queue so that failing builds always live at the bottom of the queue
57+
queueJson = sh(returnStdout: true, script: '''
58+
jq -L.scripts '
59+
include "meta";
60+
(env.pastFailedJobsJson | fromjson) as $pastFailedJobs
61+
| [
62+
.[]
63+
| select(
64+
needs_build
65+
and (
66+
.build.arch as $arch
67+
| if env.BASHBREW_ARCH == "gha" then
68+
[ "amd64", "i386", "windows-amd64" ]
69+
else [ env.BASHBREW_ARCH ] end
70+
| index($arch)
71+
)
5472
)
55-
)
56-
]
57-
' builds.json
58-
''').trim()
73+
]
74+
# this Jenkins job exports a JSON file that includes the number of attempts so far per failing buildId so that this can sort by attempts which means failing builds always live at the bottom of the queue (sorted by the number of times they have failed, so the most failing is always last)
75+
| sort_by($pastFailedJobs[.buildId].count // 0)
76+
' builds.json
77+
''').trim()
78+
}
5979
}
60-
if (queueJSON && queueJSON != '[]') {
61-
queue = readJSON(text: queueJSON)
80+
if (queueJson && queueJson != '[]') {
81+
queue = readJSON(text: queueJson)
6282
currentBuild.displayName = 'queue size: ' + queue.size() + ' (#' + currentBuild.number + ')'
6383
} else {
6484
currentBuild.displayName = 'empty queue (#' + currentBuild.number + ')'
@@ -142,6 +162,8 @@ node {
142162
if (breakEarly) { return } // thanks Jenkins...
143163

144164
// now that we have our parsed queue, we can release the node we're holding up (since we handle GHA builds above)
165+
def pastFailedJobs = readJSON(text: pastFailedJobsJson)
166+
def newFailedJobs = [:]
145167

146168
for (buildObj in queue) {
147169
def identifier = buildObj.source.allTags[0]
@@ -161,12 +183,48 @@ for (buildObj in queue) {
161183
quietPeriod: 5, // seconds
162184
)
163185
// TODO do something useful with "res.result" (especially "res.result != 'SUCCESS'")
164-
// (maybe store "res.startTimeInMillis + res.duration" as endTime so we can implement some amount of backoff somehow?)
165186
echo(res.result)
166187
if (res.result != 'SUCCESS') {
188+
def c = 1
189+
if (pastFailedJobs[buildObj.buildId]) {
190+
// TODO more defensive access of .count? (it is created just below, so it should be safe)
191+
c += pastFailedJobs[buildObj.buildId].count
192+
}
193+
// TODO maybe implement some amount of backoff? keep first url/endTime?
194+
newFailedJobs[buildObj.buildId] = [
195+
count: c,
196+
identifier: identifier,
197+
url: res.absoluteUrl,
198+
endTime: (res.startTimeInMillis + res.duration) / 1000.0, // convert to seconds
199+
]
200+
167201
// "catchError" is the only way to set "stageResult" :(
168202
catchError(message: 'Build of "' + identifier + '" failed', buildResult: 'UNSTABLE', stageResult: 'FAILURE') { error() }
169203
}
170204
}
171205
}
172206
}
207+
208+
// save newFailedJobs so we can use it next run as pastFailedJobs
209+
node {
210+
def newFailedJobsJson = writeJSON(json: newFailedJobs, returnText: true)
211+
withEnv([
212+
'newFailedJobsJson=' + newFailedJobsJson,
213+
]) {
214+
stage('Archive') {
215+
dir('builds') {
216+
deleteDir()
217+
sh '''#!/usr/bin/env bash
218+
set -Eeuo pipefail -x
219+
220+
jq <<<"$newFailedJobsJson" '.' | tee pastFailedJobs.json
221+
'''
222+
archiveArtifacts(
223+
artifacts: '*.json',
224+
fingerprint: true,
225+
onlyIfSuccessful: true,
226+
)
227+
}
228+
}
229+
}
230+
}

0 commit comments

Comments
 (0)