Skip to content

Commit 36212fe

Browse files
committed
Merge branch 'htc_frameworks' into 'main'
Add htcimager and refactored htclean to main See merge request ardg/libra!152
2 parents de5ced2 + 719c77a commit 36212fe

File tree

109 files changed

+5838
-337
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

109 files changed

+5838
-337
lines changed

apps/src/tests/test_htclean.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,8 @@ TEST(HtcleanTest, Loops) {
5454
// copy the input .def
5555
std::filesystem::copy(goldDir/"libra_htclean.def", testDir/"libra_htclean.def", copy_options::recursive);
5656
// copy htclean scripts
57-
std::filesystem::copy(goldDir/"../../../../frameworks/htclean/libra_htclean.sh", testDir/"libra_htclean.sh", copy_options::recursive);
58-
std::filesystem::copy(goldDir/"../../../../frameworks/htclean/runapp.sh", testDir/"runapp.sh", copy_options::recursive);
57+
std::filesystem::copy(goldDir/"../../../../install/bin/libra_htclean.sh", testDir/"libra_htclean.sh", copy_options::recursive);
58+
std::filesystem::copy(goldDir/"../../../../install/bin/runapp.sh", testDir/"runapp.sh", copy_options::recursive);
5959
//Step into test dir
6060
std::filesystem::current_path(testDir);
6161

frameworks/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ set(FRAMEWORK_FILES
2020
)
2121

2222
if(DEFINED TOP_LEVEL_DIR AND DEFINED INSTALL_DIR)
23-
install(FILES ${FRAMEWORK_FILES} DESTINATION ${INSTALL_DIR}/bin)
23+
install(FILES ${FRAMEWORK_FILES} PERMISSIONS OWNER_READ OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE DESTINATION ${INSTALL_DIR}/bin)
2424
message(STATUS "Scripts installed to ${INSTALL_DIR}/bin")
2525
else()
2626
message(WARNING "TOP_LEVEL_DIR or INSTALL_DIR is not defined, skipping script installation")

frameworks/README.md

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,6 @@
1-
# Description
1+
# Frameworks
22

3-
Framework code for iterative image reconstruction using LibRA [apps](/README.md#currently-available-apps).
3+
## Description
44

5-
## To Do List
5+
Framework code for iterative image reconstruction using LibRA [apps](/README.md#currently-available-apps).
66

7-
- [ ] Make a Python version of libra_htclean.sh. This will allow some error handling as well.
8-
- [ ] Script to deploy libra_htclean in parallel via Slurm. Similar to the one that deploys it on the PATh network using HTCondor scheduler.
9-
- [x] review structure of framework/htclean directory to eliminate duplicates - fmadsen
10-
- [x] integrate new scatter mode from ```/lustre/aoc/sciops/fmadsen/tickets/scg-136/imaging_tests/bin/``` (HTCSynthesisImager.py and htclean.py) - fmadsen
11-
- [ ] merge different templates into a general implementation - fmadsen
12-
- [ ] ~~check if ```roadrunner``` can compute cfcache if it's not provided - sbhatnag~~
13-
- [x] ~~check mode = predict on ```roadrunner``` - sbhatnag~~

frameworks/htcimager/GPUMonitor.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
# Copyright (C) 2024
2+
# Associated Universities, Inc. Washington DC, USA.
3+
#
4+
# This library is free software; you can redistribute it and/or modify it
5+
# under the terms of the GNU Library General Public License as published by
6+
# the Free Software Foundation; either version 2 of the License, or (at your
7+
# option) any later version.
8+
#
9+
# This library is distributed in the hope that it will be useful, but WITHOUT
10+
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
12+
# License for more details.is
13+
#
14+
# You should have received a copy of the GNU Library General Public License
15+
# along with this library; if not, write to the Free Software Foundation,
16+
# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
17+
#
18+
# Correspondence concerning this should be addressed as follows:
19+
# Postal address: National Radio Astronomy Observatory
20+
# 1003 Lopezville Road,
21+
# Socorro, NM - 87801, USA
22+
#
23+
# $Id$
24+
25+
import os
26+
from subprocess import Popen,PIPE
27+
28+
class GPUMonitor(object):
29+
30+
def __init__(self):
31+
self.device = self.getDevice()
32+
self.GPUName = self.getDeviceProperty('name')
33+
self.GPUCapability = self.getDeviceProperty('compute_cap')
34+
self.listSystemGPUs()
35+
36+
def listSystemGPUs(self):
37+
with Popen(['nvidia-smi'], stdout = PIPE) as nvidia_smi:
38+
print(nvidia_smi.stdout.read().decode('ascii'))
39+
40+
def getDevice(self):
41+
try:
42+
device = os.environ['NVIDIA_VISIBLE_DEVICES']
43+
except KeyError:
44+
print('Variable ${NVIDIA_VISIBLE_DEVICES} not set, obtaining device id with uuid. This may be a problem in systems with multiple GPUs.')
45+
cmd = ['nvidia-smi','--query-gpu=uuid','--format=csv,noheader']
46+
with Popen(cmd, stdout = PIPE) as nvidia_smi:
47+
device = nvidia_smi.stdout.read().decode('ascii').strip()
48+
except Exception as e:
49+
raise e
50+
return device
51+
52+
def getDeviceProperty(self, property):
53+
cmd = ['nvidia-smi','--query-gpu=' + property,'--id=' + self.device,'--format=csv,noheader']
54+
with Popen(cmd, stdout = PIPE) as nvidia_smi:
55+
devprop = nvidia_smi.stdout.read().decode('ascii').strip()
56+
return devprop
57+
58+
def startMonitor(self, interval = 1, outputfile = 'GPUMonitor.out'):
59+
cmd = ['nvidia-smi','--query-gpu=timestamp,name,utilization.gpu,utilization.memory,memory.used',
60+
'--format=csv','-l',str(interval),'--id=' + self.device,'--filename=' + outputfile]
61+
self.monitor = Popen(cmd)
62+
63+
def stopMonitor(self):
64+
self.monitor.send_signal(2)
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
#!/bin/bash
2+
# Copyright (C) 2024
3+
# Associated Universities, Inc. Washington DC, USA.
4+
#
5+
# This library is free software; you can redistribute it and/or modify it
6+
# under the terms of the GNU Library General Public License as published by
7+
# the Free Software Foundation; either version 2 of the License, or (at your
8+
# option) any later version.
9+
#
10+
# This library is distributed in the hope that it will be useful, but WITHOUT
11+
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12+
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
13+
# License for more details.is
14+
#
15+
# You should have received a copy of the GNU Library General Public License
16+
# along with this library; if not, write to the Free Software Foundation,
17+
# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
18+
#
19+
# Correspondence concerning this should be addressed as follows:
20+
# Postal address: National Radio Astronomy Observatory
21+
# 1003 Lopezville Road,
22+
# Socorro, NM - 87801, USA
23+
#
24+
# $Id$
25+
26+
27+
usage()
28+
{
29+
echo "Usage: $0 [-s <save logs location>] [-h]"$'\n'\
30+
" -s (optional) is the location where the user wants to save existing logs before cleanup."$'\n'\
31+
" -h prints help and exits"$'\n\n'\
32+
" Example: $0 -s 20151021.logs"$'\n'
33+
}
34+
35+
36+
# Begin execution
37+
38+
39+
while getopts "s:h" option; do
40+
case "$option" in
41+
s) savelogsdir=${OPTARG} ;;
42+
h) usage
43+
exit 0 ;;
44+
*) echo "${option}: Unknown option"
45+
usage
46+
exit 1 ;;
47+
esac
48+
done
49+
50+
cleanupextlist="*.log *.dag.* logs.* *.std*"
51+
52+
if [ ! -z ${savelogsdir} ]
53+
then
54+
mkdir ${savelogsdir}
55+
cp ${cleanupextlist} ${savelogsdir}
56+
fi
57+
58+
59+
rm -rf ${cleanupextlist}
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
# Copyright (C) 2024
2+
# Associated Universities, Inc. Washington DC, USA.
3+
#
4+
# This library is free software; you can redistribute it and/or modify it
5+
# under the terms of the GNU Library General Public License as published by
6+
# the Free Software Foundation; either version 2 of the License, or (at your
7+
# option) any later version.
8+
#
9+
# This library is distributed in the hope that it will be useful, but WITHOUT
10+
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
12+
# License for more details.is
13+
#
14+
# You should have received a copy of the GNU Library General Public License
15+
# along with this library; if not, write to the Free Software Foundation,
16+
# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
17+
#
18+
# Correspondence concerning this should be addressed as follows:
19+
# Postal address: National Radio Astronomy Observatory
20+
# 1003 Lopezville Road,
21+
# Socorro, NM - 87801, USA
22+
#
23+
# $Id$
24+
25+
26+
include: imageSolver.htc
27+
include: imcycle.htc
28+
include: input_files.htc
29+
30+
31+
notification = Error
32+
33+
request_memory = $(memModel)
34+
request_disk = $(diskModel)
35+
36+
+DESIRED_SITES = "WISC-PATH"
37+
38+
arguments = "-j $(jobmode) -n $(jobmode) -f $(input_file) -i $(gatherimagelist) -t residual -b $(libra_bundle)"
39+
40+
if $(useStartImages)
41+
input_files = $(input_files), $(imagesdir)/$(baseimagename).residual.start.tgz?pack=auto
42+
endif
43+
44+
transfer_input_files = $(htcimagerfiles), bin/imageSolver.def, $(softwaredir)/$(libra_bundle), $(softwaredir)/casa-data.tgz?pack=auto, $(input_files), $(imagesdir)/$(baseimagename).sumwt.tgz?pack=auto, $(imagesdir)/$(baseimagename).weight.tgz?pack=auto
45+
46+
47+
transfer_output_remaps = "logs.tgz = $(initialdir)/logs.$(jobmode).imcycle$(imcycle).tgz; $(baseimagename).residual.tgz = $(imagesdir)/$(baseimagename).residual.imcycle$(imcycle).noiter.tgz"
48+
49+
50+
queue
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
# Copyright (C) 2024
2+
# Associated Universities, Inc. Washington DC, USA.
3+
#
4+
# This library is free software; you can redistribute it and/or modify it
5+
# under the terms of the GNU Library General Public License as published by
6+
# the Free Software Foundation; either version 2 of the License, or (at your
7+
# option) any later version.
8+
#
9+
# This library is distributed in the hope that it will be useful, but WITHOUT
10+
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
12+
# License for more details.is
13+
#
14+
# You should have received a copy of the GNU Library General Public License
15+
# along with this library; if not, write to the Free Software Foundation,
16+
# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
17+
#
18+
# Correspondence concerning this should be addressed as follows:
19+
# Postal address: National Radio Astronomy Observatory
20+
# 1003 Lopezville Road,
21+
# Socorro, NM - 87801, USA
22+
#
23+
# $Id$
24+
25+
26+
include: imageSolver.htc
27+
include: imcycle.htc
28+
include: input_files.htc
29+
30+
31+
notification = Error
32+
33+
request_memory = $(memModel)
34+
request_disk = $(diskModel)
35+
36+
+DESIRED_SITES = "WISC-PATH"
37+
38+
arguments = "-j $(jobmode) -n $(jobmode) -f $(input_file) -i $(gatherimagelist) -b $(libra_bundle)"
39+
40+
41+
if $(useStartImages)
42+
output_files = $(baseimagename).residual.start.tgz
43+
endif
44+
45+
if $(useStartModel)
46+
input_files = $(input_files), $(imagesdir)/$(baseimagename).model.start.tgz?pack=auto
47+
output_files = $(output_files), $(baseimagename).divmodel.tgz
48+
endif
49+
50+
if $(useStartMask)
51+
input_files = $(input_files), $(imagesdir)/$(baseimagename).mask.start.tgz?pack=auto
52+
endif
53+
54+
transfer_input_files = $(htcimagerfiles), bin/imageSolver.def, $(softwaredir)/$(libra_bundle), $(softwaredir)/casa-data.tgz?pack=auto, $(input_files)
55+
56+
transfer_output_files = logs.tgz, $(baseimagename).psf.tgz, $(baseimagename).weight.tgz, $(baseimagename).sumwt.tgz, $(baseimagename).pb.tgz, $(output_files)
57+
58+
transfer_output_remaps = "logs.tgz = $(initialdir)/logs.$(jobmode).imcycle$(imcycle).tgz; $(baseimagename).psf.tgz = $(imagesdir)/$(baseimagename).psf.tgz; $(baseimagename).weight.tgz = $(imagesdir)/$(baseimagename).weight.tgz; $(baseimagename).sumwt.tgz = $(imagesdir)/$(baseimagename).sumwt.tgz; $(baseimagename).pb.tgz = $(imagesdir)/$(baseimagename).pb.tgz; $(baseimagename).divmodel.tgz = $(imagesdir)/$(baseimagename).divmodel.imcycle$(imcycle).tgz; $(baseimagename).residual.start.tgz = $(imagesdir)/$(baseimagename).residual.start.tgz"
59+
60+
queue
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
# Copyright (C) 2024
2+
# Associated Universities, Inc. Washington DC, USA.
3+
#
4+
# This library is free software; you can redistribute it and/or modify it
5+
# under the terms of the GNU Library General Public License as published by
6+
# the Free Software Foundation; either version 2 of the License, or (at your
7+
# option) any later version.
8+
#
9+
# This library is distributed in the hope that it will be useful, but WITHOUT
10+
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
12+
# License for more details.is
13+
#
14+
# You should have received a copy of the GNU Library General Public License
15+
# along with this library; if not, write to the Free Software Foundation,
16+
# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
17+
#
18+
# Correspondence concerning this should be addressed as follows:
19+
# Postal address: National Radio Astronomy Observatory
20+
# 1003 Lopezville Road,
21+
# Socorro, NM - 87801, USA
22+
#
23+
# $Id$
24+
25+
26+
notification = Error
27+
28+
cuda_cap = $$([int(GPUs_Capability)])
29+
30+
request_cpus = 2
31+
request_gpus = 1
32+
request_memory = $(memResidual)
33+
request_disk = $(diskResidual)
34+
35+
+UNDESIRED_Sites = $(und_sites_grid)
36+
if $(wantOSPool)
37+
+WantOSPool = true
38+
endif
39+
40+
if $(NRPexclusive)
41+
+DESIRED_SITES = "SDSC-PRP"
42+
endif
43+
44+
arguments = "-j $(jobmode) -n $(jobmode).$(partId) -f $(input_file) -i $(baseimagename).$(partId) -m $(msname) -c $(cfcache) -b $(libra_bundle)"
45+
46+
47+
output = $(jobmode).$(partId).imcycle$(imcycle).$(cuda_cap)x.stdout
48+
error = $(jobmode).$(partId).imcycle$(imcycle).$(cuda_cap)x.stderr
49+
log = $(jobmode).$(partId).imcycle$(imcycle).condor.log
50+
51+
+DESIRED_GPU_MIN_Capability = 7.0
52+
+DESIRED_GPU_MIN_GlobalMemoryMb = 16000
53+
+DESIRED_GPU_MIN_DriverVersion = 12.0
54+
RequireGPUs = (Capability >= DESIRED_GPU_MIN_Capability) && (GlobalMemoryMb > DESIRED_GPU_MIN_GlobalMemoryMb) && (DriverVersion >= DESIRED_GPU_MIN_DriverVersion)
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# Copyright (C) 2024
2+
# Associated Universities, Inc. Washington DC, USA.
3+
#
4+
# This library is free software; you can redistribute it and/or modify it
5+
# under the terms of the GNU Library General Public License as published by
6+
# the Free Software Foundation; either version 2 of the License, or (at your
7+
# option) any later version.
8+
#
9+
# This library is distributed in the hope that it will be useful, but WITHOUT
10+
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
12+
# License for more details.is
13+
#
14+
# You should have received a copy of the GNU Library General Public License
15+
# along with this library; if not, write to the Free Software Foundation,
16+
# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
17+
#
18+
# Correspondence concerning this should be addressed as follows:
19+
# Postal address: National Radio Astronomy Observatory
20+
# 1003 Lopezville Road,
21+
# Socorro, NM - 87801, USA
22+
#
23+
# $Id$
24+
25+
26+
SUBDAG EXTERNAL initialCycle initialCycle.dag
27+
SUBDAG EXTERNAL imagingCycle imagingCycle.dag
28+
FINAL restore restore.htc
29+
30+
SCRIPT PRE initialCycle set_retry.sh -1
31+
SCRIPT PRE imagingCycle set_retry.sh $RETRY
32+
RETRY imagingCycle 20
33+
SCRIPT POST imagingCycle checkConvergence.py
34+
35+
PARENT initialCycle CHILD imagingCycle
36+
37+
VARS restore jobmode="$(JOB)"
38+
VARS restore input_file="imageSolver.def"
39+
40+
ABORT-DAG-ON imagingCycle 2

0 commit comments

Comments
 (0)