11#!/usr/bin/python
2- import os
32import sys
43import yaml
54import psutil
@@ -23,7 +22,6 @@ def main(args):
2322 run_name = args ['--run' ]
2423 job = args ['--job' ]
2524 jobspec = args ['--jobspec' ]
26- archive_base = args ['--archive' ]
2725 owner = args ['--owner' ]
2826 machine_type = args ['--machine-type' ]
2927 preserve_queue = args ['--preserve-queue' ]
@@ -35,42 +33,35 @@ def main(args):
3533
3634 if job :
3735 for job_id in job :
38- kill_job (run_name , job_id , archive_base , owner )
36+ kill_job (
37+ run_name ,
38+ job_id ,
39+ owner
40+ )
3941 else :
40- kill_run (run_name , archive_base , owner , machine_type ,
41- preserve_queue = preserve_queue )
42+ kill_run (
43+ run_name ,
44+ owner ,
45+ machine_type ,
46+ preserve_queue = preserve_queue ,
47+ )
4248
4349
44- def kill_run (run_name , archive_base = None , owner = None , machine_type = None ,
50+ def kill_run (run_name , owner = None , machine_type = None ,
4551 preserve_queue = False ):
46- run_info = {}
47- serializer = report .ResultsSerializer (archive_base )
48- if archive_base :
49- run_archive_dir = os .path .join (archive_base , run_name )
50- if os .path .isdir (run_archive_dir ):
51- run_info = find_run_info (serializer , run_name )
52- if 'machine_type' in run_info :
53- machine_type = run_info ['machine_type' ]
54- owner = run_info ['owner' ]
55- else :
56- log .warning ("The run info does not have machine type: %s" % run_info )
57- log .warning ("Run archive used: %s" % run_archive_dir )
58- log .info ("Using machine type '%s' and owner '%s'" % (machine_type , owner ))
59- elif machine_type is None :
60- # no jobs found in archive and no machine type specified,
61- # so we try paddles to see if there is anything scheduled
62- run_info = report .ResultsReporter ().get_run (run_name )
63- machine_type = run_info .get ('machine_type' , None )
64- if machine_type :
65- log .info (f"Using machine type '{ machine_type } ' received from paddles." )
66- else :
67- raise RuntimeError (f"Cannot find machine type for the run { run_name } ; " +
68- "you must also pass --machine-type" )
52+ run_info = report .ResultsReporter ().get_run (run_name )
53+ # run: machine_type, owner
54+ # job: pid, id
55+ machine_type = run_info .get ('machine_type' , None )
6956
7057 if not preserve_queue :
7158 remove_beanstalk_jobs (run_name , machine_type )
7259 remove_paddles_jobs (run_name )
73- if kill_processes (run_name , run_info .get ('pids' )):
60+ pids = []
61+ for job in run_info ['jobs' ]:
62+ if pid := job .get ('pid' ):
63+ pids .append (int (pid ))
64+ if kill_processes (run_name , pids ):
7465 return
7566 if owner is not None :
7667 targets = find_targets (run_name )
@@ -79,7 +70,7 @@ def kill_run(run_name, archive_base=None, owner=None, machine_type=None,
7970 report .try_mark_run_dead (run_name )
8071
8172
82- def kill_job (run_name , job_id , archive_base = None , owner = None , skip_unlock = False ):
73+ def kill_job (run_name , job_id , owner = None , skip_unlock = False ):
8374 job_info = report .ResultsReporter ().get_jobs (run_name , job_id )
8475 if not owner :
8576 if 'owner' not in job_info :
@@ -111,34 +102,6 @@ def kill_job(run_name, job_id, archive_base=None, owner=None, skip_unlock=False)
111102 lock_ops .unlock_safe (list (targets .keys ()), owner , run_name , job_id )
112103
113104
114- def find_run_info (serializer , run_name ):
115- log .info ("Assembling run information..." )
116- run_info_fields = [
117- 'machine_type' ,
118- 'owner' ,
119- ]
120-
121- pids = []
122- run_info = {}
123- job_info = {}
124- job_num = 0
125- jobs = serializer .jobs_for_run (run_name )
126- job_total = len (jobs )
127- for (job_id , job_dir ) in jobs .items ():
128- if not os .path .isdir (job_dir ):
129- continue
130- job_num += 1
131- beanstalk .print_progress (job_num , job_total , 'Reading Job: ' )
132- job_info = serializer .job_info (run_name , job_id , simple = True )
133- for key in job_info .keys ():
134- if key in run_info_fields and key not in run_info :
135- run_info [key ] = job_info [key ]
136- if 'pid' in job_info :
137- pids .append (job_info ['pid' ])
138- run_info ['pids' ] = pids
139- return run_info
140-
141-
142105def remove_paddles_jobs (run_name ):
143106 jobs = report .ResultsReporter ().get_jobs (run_name , fields = ['status' ])
144107 job_ids = [job ['job_id' ] for job in jobs if job ['status' ] == 'queued' ]
@@ -229,7 +192,7 @@ def kill_processes(run_name, pids=None, job_id=None):
229192def process_matches_run (pid , run_name ):
230193 try :
231194 p = psutil .Process (pid )
232- cmd = p .cmdline ()
195+ cmd = ' ' . join ( p .cmdline () )
233196 if run_name in cmd and sys .argv [0 ] not in cmd :
234197 return True
235198 except psutil .NoSuchProcess :
0 commit comments