1616It attempts to group logically identical failure reasons together, and then
1717outputs a list of observed failure reasons, ordered by frequency.
1818
19+ The script also extracts NODE_NAME from Jenkins console output to help identify
20+ which Jenkins nodes experienced failures. If no name is found, it will be set
21+ to 'Unknown'.
22+
1923Note: This is _very_ rough-and-ready - it "works" in that it extracts useful
2024information from our CV jobs, but it's likely very specialised to the currently
2125observed test failures - i.e. the filtering in filter_failed_builds() will
3842import os
3943import re
4044import sys
45+ import requests
4146
4247try :
4348 import jenkins
@@ -79,6 +84,14 @@ def jenkins_request(
7984 req .url += 'tree=' + self .tree_filter
8085 return super ().jenkins_request (req , add_crumb , resolve_auth , stream )
8186
87+ class SessionWithAuth (requests .Session ):
88+ """
89+ A session that persists the authentication credentials.
90+ """
91+ def __init__ (self , username , password ):
92+ super ().__init__ ()
93+ self .auth = (username , password )
94+
8295
8396def init_worker (function , url , username , password ):
8497 """Initialise a multiprocessing worker by establishing a connection to
@@ -88,6 +101,42 @@ def init_worker(function, url, username, password):
88101 # We only ever access these fields from these code paths.
89102 function .server .tree_filter = 'url,result,timestamp,actions[parameters[*],foundFailureCauses[*]]'
90103
104+ # Initialize for HTTP requests
105+ function .request_session = SessionWithAuth (username , password )
106+
107+ def get_node_name (server , job , number , session ):
108+ console_url = None
109+
110+ if job .startswith ('kv_engine-windows-' ):
111+ # Sample console URL: /job/kv_engine-windows-master/63025/consoleText
112+ console_url = f"{ server .server } job/{ job } /{ number } /consoleText"
113+ else :
114+ # We need the branch separate from the job name to construct the console URL.
115+ # Sample console URL: /job/kv_engine.ASan-UBSan/job/master/43301/consoleText
116+ (job_name , branch ) = job .split ('/' )
117+ console_url = f"{ server .server } job/{ job_name } /job/{ branch } /{ number } /consoleText"
118+
119+ try :
120+ response = session .get (console_url , stream = True , timeout = 30 )
121+ response .raise_for_status ()
122+
123+ # Node name in the first ~5000 bytes, however load 10000 bytes to be safe.
124+ for chunk in response .iter_content (decode_unicode = True , chunk_size = 10000 ):
125+ match = re .search (r'NODE_NAME=([^\s]+)' , chunk )
126+ if match :
127+ return match .group (1 )
128+ break
129+
130+ logging .warning (f"Failed to find NODE_NAME for { job } -{ number } " )
131+
132+ except requests .exceptions .RequestException as e :
133+ logging .debug (f"get_node_name: Failed to fetch from { console_url } : { e } " )
134+ return None
135+ except Exception as e :
136+ logging .warning (f"get_node_name: Failed to fetch console output for { job } -{ number } : { e } " )
137+ return None
138+
139+ return None
91140
92141def get_build_info (build ):
93142 """For the given build job and number, download the information for
@@ -105,6 +154,15 @@ def get_build_info(build):
105154 if result in ('SUCCESS' , 'ABORTED' ):
106155 # Job succeeded or explicitly aborted - skip
107156 return
157+
158+ session = get_build_info .request_session
159+ node_name = get_node_name (get_build_info .server , job , number , session )
160+ if node_name :
161+ info ['node_name' ] = node_name
162+ logging .debug ("Build: {}-{}: Node: {}" .format (job , number , node_name ))
163+ else :
164+ info ['node_name' ] = 'Unknown'
165+
108166 key = job + "-" + str (number )
109167 return (key , info )
110168
@@ -189,7 +247,8 @@ def extract_failed_builds(details):
189247 failures [description ].append ({'description' : description ,
190248 'gerrit_patch' : gerrit_patch ,
191249 'timestamp' : timestamp ,
192- 'url' : info ['url' ]})
250+ 'url' : info ['url' ],
251+ 'node_name' : info ['node_name' ]})
193252 if not description :
194253 logging .warning (
195254 "extract_failed_builds: Did not find failure cause for " +
@@ -459,8 +518,9 @@ def include(elem):
459518 (num_failures * 100.0 ) / total_failures ))
460519 for d_idx , d in enumerate (details [:100 ]):
461520 human_time = d ['timestamp' ].strftime ('%Y-%m-%d %H:%M:%S' )
462- print ("* Time: {}, Jenkins job: {}, patch: {}" .format (human_time ,
463- d ['url' ], d ['gerrit_patch' ]))
521+ node_name = d ['node_name' ]
522+ print ("* Time: {}, Jenkins job: {}, patch: {}, node: {}" .format (human_time ,
523+ d ['url' ], d ['gerrit_patch' ], node_name ))
464524 if len (d ['variables' ]) > 0 :
465525 print (' `- where ' , end = '' )
466526 for name , value in d ['variables' ].items ():
0 commit comments