1
1
import datetime
2
2
3
- from treeherder .model .models import Group , GroupStatus , Job , Push
3
+ from treeherder .model .models import Group , GroupStatus , Job , JobLog , Push
4
+
5
+
6
+ def _check_and_mark_infra (current_job , job_ids , push_ids ):
7
+ """
8
+ current_job - Job object of incoming job we are parsing
9
+ job_ids - list of all job_ids found in previous query
10
+ push_ids - ids of pushes we care about from previous query
11
+ """
12
+ if current_job .result != "success" :
13
+ # if new job is broken, then only look on same push
14
+ # otherwise it could be a new failure.
15
+ push_ids = [current_job .push .id ]
16
+
17
+ # look for all jobs in pushids matching current_job.job_type.name
18
+ # if older are failing for "infra", then ensure same job is passing
19
+ # if so mark as intermittent
20
+ extra_jobs = JobLog .objects .filter (
21
+ job__push__id__range = (push_ids [- 1 ], push_ids [0 ]),
22
+ job__push__repository__id = current_job .repository .id ,
23
+ job__job_type__name = current_job .job_type .name ,
24
+ job__failure_classification_id__in = [1 , 6 ],
25
+ status__in = (1 , 2 , 3 ), # ignore pending
26
+ job__result__in = [
27
+ "busted" ,
28
+ "testfailed" ,
29
+ "exception" ,
30
+ "success" ,
31
+ ], # primarily ignore retry/usercancel
32
+ ).values (
33
+ "job__id" ,
34
+ "job__result" ,
35
+ "job__failure_classification_id" ,
36
+ )
37
+
38
+ if len (extra_jobs ) == 0 :
39
+ return
40
+
41
+ # ensure 50% 'success' rate
42
+ # success here means the task ran and produced groups | is success
43
+ # jobs without groups (like marionette) will still get tallied properly here
44
+ extra_failed = []
45
+ for job in extra_jobs :
46
+ if job ["job__id" ] not in job_ids and job ["job__result" ] != "success" :
47
+ extra_failed .append (job )
48
+
49
+ # look for failure rate > 50% and exit early
50
+ if len (extra_failed ) / len (extra_jobs ) > 0.5 :
51
+ return
52
+
53
+ # any extra_jobs will be failures without groups (infra/timeout/etc.)
54
+ # theoretically there could be many jobs here
55
+ # mark extra_jobs as `intermittent_needs_classification`
56
+ for job in extra_failed :
57
+ if job ["job__failure_classification_id" ] not in [4 , 8 ]:
58
+ Job .objects .filter (id = job ["job__id" ]).update (failure_classification_id = 8 )
4
59
5
60
6
61
def check_and_mark_intermittent (job_id ):
@@ -52,7 +107,7 @@ def check_and_mark_intermittent(job_id):
52
107
job_logs__job__result__in = [
53
108
"success" ,
54
109
"testfailed" ,
55
- ], # primarily ignore retry/usercancel
110
+ ], # primarily ignore retry/usercancel/unknown
56
111
group_result__status__in = [GroupStatus .OK , GroupStatus .ERROR ],
57
112
)
58
113
.values (
@@ -65,6 +120,11 @@ def check_and_mark_intermittent(job_id):
65
120
.order_by ("-job_logs__job__push__id" )
66
121
)
67
122
123
+ # If no groups, look for infra
124
+ distinct_job_ids = list (set ([f ["job_logs__job__id" ] for f in all_groups ]))
125
+ if len (distinct_job_ids ) == 1 :
126
+ return _check_and_mark_infra (current_job , distinct_job_ids , ids )
127
+
68
128
mappings = {}
69
129
for item in all_groups :
70
130
jobname = item ["job_logs__job__job_type__name" ].strip ("-cf" )
@@ -146,10 +206,9 @@ def check_and_mark_intermittent(job_id):
146
206
):
147
207
target_job = Job .objects .filter (id = job )
148
208
149
- # TODO: infra would be nice to detect, but in the case of no groups, our data set == []
150
- # edge case is all groups originally pass and then shutdown leaks cause 'testfailed'.
151
- # also we ignore infra/leaks that don't report group failures in errorsummary files
152
209
if target_job [0 ].result != "success" and target_job [
153
210
0
154
211
].failure_classification_id not in [4 , 8 ]:
155
212
target_job .update (failure_classification_id = 8 )
213
+
214
+ return _check_and_mark_infra (current_job , distinct_job_ids , ids )
0 commit comments