@@ -1586,6 +1586,12 @@ def _cluster_id_default(self):
1586
1586
help = "The name of the command line program used to delete jobs." ,
1587
1587
)
1588
1588
1589
+ signal_command = List (
1590
+ ['' ],
1591
+ config = True ,
1592
+ help = "The name of the command line program used to send signals to jobs." ,
1593
+ )
1594
+
1589
1595
job_id = Unicode ().tag (to_dict = True )
1590
1596
1591
1597
job_id_regexp = CRegExp (
@@ -1616,7 +1622,7 @@ def _cluster_id_default(self):
1616
1622
def _queue_changed (self , change ):
1617
1623
self ._update_context (change )
1618
1624
1619
- n = Integer (1 )
1625
+ n = Integer (1 ). tag ( to_dict = True )
1620
1626
1621
1627
@observe ('n' )
1622
1628
def _n_changed (self , change ):
@@ -1643,7 +1649,7 @@ def _n_changed(self, change):
1643
1649
This lets you parameterize additional options,
1644
1650
such as wall_time with a custom template.
1645
1651
""" ,
1646
- )
1652
+ ). tag ( to_dict = True )
1647
1653
1648
1654
@default ("context" )
1649
1655
def _context_default (self ):
@@ -1714,7 +1720,7 @@ def write_batch_script(self, n=1):
1714
1720
# from user config
1715
1721
ns .update (self .namespace )
1716
1722
script_as_string = self .formatter .format (self .batch_template , ** ns )
1717
- self .log .debug ('Writing batch script: %s' , self .batch_file )
1723
+ self .log .debug (f 'Writing batch script: { self .batch_file } \n { script_as_string } ' )
1718
1724
with open (self .batch_file , 'w' ) as f :
1719
1725
f .write (script_as_string )
1720
1726
os .chmod (self .batch_file , stat .S_IRUSR | stat .S_IWUSR | stat .S_IXUSR )
@@ -1739,35 +1745,44 @@ def start(self, n=1):
1739
1745
# Here we save profile_dir in the context so they
1740
1746
# can be used in the batch script template as {profile_dir}
1741
1747
self .write_batch_script (n )
1748
+
1742
1749
output = check_output (self .args , env = os .environ )
1743
1750
output = output .decode (DEFAULT_ENCODING , 'replace' )
1751
+ self .log .debug (f"Submitted { shlex_join (self .args )} . Output: { output } " )
1744
1752
1745
1753
job_id = self .parse_job_id (output )
1746
1754
self .notify_start (job_id )
1747
1755
return job_id
1748
1756
1749
1757
def stop (self ):
1750
1758
try :
1751
- p = Popen (
1759
+ output = check_output (
1752
1760
self .delete_command + [self .job_id ],
1753
- env = os .environ ,
1754
- stdout = PIPE ,
1755
- stderr = PIPE ,
1756
- )
1757
- out , err = p .communicate ()
1758
- output = out + err
1759
- except :
1761
+ stdin = None ,
1762
+ ).decode (DEFAULT_ENCODING , 'replace' )
1763
+ except Exception :
1760
1764
self .log .exception (
1761
1765
"Problem stopping cluster with command: %s"
1762
1766
% (self .delete_command + [self .job_id ])
1763
1767
)
1764
1768
output = ""
1765
- output = output . decode ( DEFAULT_ENCODING , 'replace' )
1769
+
1766
1770
self .notify_stop (
1767
1771
dict (job_id = self .job_id , output = output )
1768
1772
) # Pass the output of the kill cmd
1769
1773
return output
1770
1774
1775
+ def signal (self , sig ):
1776
+ cmd = self .signal_command + [str (sig ), self .job_id ]
1777
+ try :
1778
+ output = check_output (
1779
+ cmd ,
1780
+ stdin = None ,
1781
+ ).decode (DEFAULT_ENCODING , 'replace' )
1782
+ except Exception :
1783
+ self .log .exception ("Problem sending signal with: {shlex_join(cmd)}" )
1784
+ output = ""
1785
+
1771
1786
1772
1787
class BatchControllerLauncher (BatchSystemLauncher , ControllerLauncher ):
1773
1788
@default ("program" )
@@ -1813,6 +1828,9 @@ class PBSLauncher(BatchSystemLauncher):
1813
1828
1814
1829
submit_command = List (['qsub' ], config = True , help = "The PBS submit command ['qsub']" )
1815
1830
delete_command = List (['qdel' ], config = True , help = "The PBS delete command ['qdel']" )
1831
+ signal_command = List (
1832
+ ['qsig' , '-s' ], config = True , help = "The PBS signal command ['qsig']"
1833
+ )
1816
1834
job_id_regexp = CRegExp (
1817
1835
r'\d+' ,
1818
1836
config = True ,
@@ -1868,6 +1886,11 @@ class SlurmLauncher(BatchSystemLauncher):
1868
1886
delete_command = List (
1869
1887
['scancel' ], config = True , help = "The slurm delete command ['scancel']"
1870
1888
)
1889
+ signal_command = List (
1890
+ ['scancel' , '-s' ],
1891
+ config = True ,
1892
+ help = "The slurm signal command ['scancel', '-s']" ,
1893
+ )
1871
1894
job_id_regexp = CRegExp (
1872
1895
r'\d+' ,
1873
1896
config = True ,
@@ -2023,9 +2046,12 @@ class SGEEngineSetLauncher(SGELauncher, BatchEngineSetLauncher):
2023
2046
class LSFLauncher (BatchSystemLauncher ):
2024
2047
"""A BatchSystemLauncher subclass for LSF."""
2025
2048
2026
- submit_command = List (['bsub' ], config = True , help = "The PBS submit command ['bsub']" )
2049
+ submit_command = List (['bsub' ], config = True , help = "The LSF submit command ['bsub']" )
2027
2050
delete_command = List (
2028
- ['bkill' ], config = True , help = "The PBS delete command ['bkill']"
2051
+ ['bkill' ], config = True , help = "The LSF delete command ['bkill']"
2052
+ )
2053
+ signal_command = List (
2054
+ ['bkill' , '-s' ], config = True , help = "The LSF signal command ['bkill', '-s']"
2029
2055
)
2030
2056
job_id_regexp = CRegExp (
2031
2057
r'\d+' ,
0 commit comments