Skip to content

Commit 50ce058

Browse files
authored
support reclassifying jobs to failures when new additional failures come in (#8923)
1 parent 9a83fdd commit 50ce058

File tree

2 files changed

+70
-7
lines changed

2 files changed

+70
-7
lines changed

tests/log_parser/test_store_failure_lines.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -449,3 +449,39 @@ def test_retrigger_intermittent(activate_responses, hundred_job_blobs, mock_pars
449449
# this will parse and check for intermittents
450450
mock_full_log_parser(job_logs, mock_parser)
451451
verify_classification_id(jobs, 8, 8)
452+
453+
454+
def test_reclassify_group_intermittent(
455+
activate_responses, hundred_job_blobs, mock_parser, create_jobs
456+
):
457+
# test fails, retrigger has different failures on same group, both -> intermittent
458+
log_filenames = [
459+
"mochitest-browser-chrome_errorsummary.log",
460+
"mochitest-browser-chrome_2_errorsummary.log",
461+
"mochitest-browser-chrome_errorsummary.log",
462+
]
463+
jobs = create_errorsummary_job(hundred_job_blobs[0], create_jobs, log_filenames)
464+
job_logs = JobLog.objects.filter(job_id__in=(j.id for j in jobs))
465+
assert len(jobs) == len(log_filenames)
466+
467+
# this will parse and check for intermittents
468+
mock_full_log_parser(job_logs, mock_parser)
469+
verify_classification_id(jobs, 1, 8)
470+
471+
472+
def test_reclassify_infra_intermittent(
473+
activate_responses, hundred_job_blobs, mock_parser, create_jobs
474+
):
475+
# test fails, retrigger has different failures on same group, both -> intermittent
476+
log_filenames = [
477+
"mochitest-browser-chrome_infra_errorsummary.log",
478+
"mochitest-browser-chrome_pass_errorsummary.log",
479+
"mochitest-browser-chrome_infra_errorsummary.log",
480+
]
481+
jobs = create_errorsummary_job(hundred_job_blobs[0], create_jobs, log_filenames)
482+
job_logs = JobLog.objects.filter(job_id__in=(j.id for j in jobs))
483+
assert len(jobs) == len(log_filenames)
484+
485+
# this will parse and check for intermittents
486+
mock_full_log_parser(job_logs, mock_parser)
487+
verify_classification_id(jobs, 1, 1)

treeherder/log_parser/intermittents.py

Lines changed: 34 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ def _check_and_mark_infra(current_job, job_ids, push_ids):
2121
push__id__range=(push_ids[-1], push_ids[0]),
2222
repository__id=current_job.repository.id,
2323
job_type__name=current_job.job_type.name,
24-
failure_classification_id__in=[1, 6],
24+
failure_classification_id__in=[1, 6, 8],
2525
job_log__status__in=[1, 3], # ignore pending, failed
2626
state="completed", # ignore running/pending
2727
result__in=[
@@ -36,7 +36,8 @@ def _check_and_mark_infra(current_job, job_ids, push_ids):
3636
"failure_classification_id",
3737
)
3838

39-
if len(extra_jobs) == 0:
39+
# ignore previous classified, we are looking for NEW extra jobs
40+
if len([ej for ej in extra_jobs if ej["failure_classification_id"] != 8]) == 0:
4041
return
4142

4243
# ensure 50% 'success' rate
@@ -49,6 +50,10 @@ def _check_and_mark_infra(current_job, job_ids, push_ids):
4950

5051
# look for failure rate > 50% and exit early
5152
if len(extra_failed) / len(extra_jobs) > 0.5:
53+
# as failure rate > 50%, if any jobs are fc_id=8 classify as fc_id=1
54+
for job in extra_failed:
55+
if job["failure_classification_id"] == 8:
56+
Job.objects.filter(id=job["id"]).update(failure_classification_id=1)
5257
return
5358

5459
# any extra_jobs will be failures without groups (infra/timeout/etc.)
@@ -117,6 +122,7 @@ def check_and_mark_intermittent(job_id):
117122
"group_result__status",
118123
"job_logs__job__job_type__name",
119124
"job_logs__job__push__id",
125+
"job_logs__job__failure_classification__id",
120126
)
121127
.order_by("-job_logs__job__push__id")
122128
)
@@ -127,6 +133,7 @@ def check_and_mark_intermittent(job_id):
127133
return _check_and_mark_infra(current_job, distinct_job_ids, ids)
128134

129135
mappings = {}
136+
job_classifications = {}
130137
for item in all_groups:
131138
jobname = item["job_logs__job__job_type__name"].strip("-cf")
132139
try:
@@ -139,6 +146,12 @@ def check_and_mark_intermittent(job_id):
139146
# we have a variant
140147
continue
141148

149+
# store job:fc_id so we can reference what needs changed
150+
if item["job_logs__job__id"] not in job_classifications:
151+
job_classifications[item["job_logs__job__id"]] = item[
152+
"job_logs__job__failure_classification__id"
153+
]
154+
142155
if item["job_logs__job__push__id"] not in mappings:
143156
mappings[item["job_logs__job__push__id"]] = {"groups": {}, "jobs": {}}
144157
groups = mappings[item["job_logs__job__push__id"]]["groups"]
@@ -186,6 +199,8 @@ def check_and_mark_intermittent(job_id):
186199

187200
# all changed_groups need to be evaluated on previous 'failed' jobs to ensure all groups in that task are 'passing'
188201
for id in mappings.keys():
202+
jobs_to_classify = [] # mark as fcid=8 (known intermittent)
203+
jobs_to_unclassify = [] # previously parked as fcid=8, new failing data, now fcid=1
189204
for job in mappings[id]["jobs"]:
190205
all_green = True
191206
current_all_green = True
@@ -205,11 +220,23 @@ def check_and_mark_intermittent(job_id):
205220
if (id == current_job.push.id and current_all_green) or (
206221
id != current_job.push.id and len(ids) > 1 and all_green
207222
):
208-
target_job = Job.objects.filter(id=job)
223+
jobs_to_classify.append(job)
224+
elif job_classifications[job] == 8:
225+
jobs_to_unclassify.append(job)
226+
227+
# TODO: consider job.result=(busted, exception)
228+
for job in jobs_to_classify:
229+
target_job = Job.objects.filter(
230+
id=job, result="testfailed", failure_classification_id__in=[1, 6]
231+
)
232+
if target_job:
233+
target_job.update(failure_classification_id=8)
209234

210-
if target_job[0].result != "success" and target_job[
211-
0
212-
].failure_classification_id not in [4, 8]:
213-
target_job.update(failure_classification_id=8)
235+
for job in jobs_to_unclassify:
236+
target_job = Job.objects.filter(
237+
id=job, result="testfailed", failure_classification_id=8
238+
)
239+
if target_job:
240+
target_job.update(failure_classification_id=1)
214241

215242
return _check_and_mark_infra(current_job, distinct_job_ids, ids)

0 commit comments

Comments
 (0)