Skip to content

Commit c70ecb7

Browse files
Merge pull request #17 from etive-io/fix-interest-dict
Temporarily remove interest code from monitor loop
2 parents 0ff1018 + a15de1d commit c70ecb7

File tree

3 files changed

+52
-83
lines changed

3 files changed

+52
-83
lines changed

.github/workflows/htcondor-tests.yml

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ jobs:
5050
with:
5151
miniconda-version: "latest"
5252
auto-activate-base: false
53-
python-version: "3.9"
53+
python-version: "3.10"
5454
activate-environment: asimov
5555
channels: conda-forge
5656
conda-remove-defaults: "true"
@@ -59,7 +59,7 @@ jobs:
5959
uses: actions/cache@v4
6060
with:
6161
path: /usr/share/miniconda/envs/asimov
62-
key: conda-env-asimov-${{ runner.os }}-py39-${{ hashFiles('conda/environment.yaml') }}
62+
key: conda-env-asimov-${{ runner.os }}-py310-${{ hashFiles('conda/environment.yaml') }}
6363
restore-keys: |
6464
conda-env-asimov-${{ runner.os }}-
6565
@@ -68,6 +68,7 @@ jobs:
6868
conda install -y -n asimov --file conda/environment.yaml
6969
conda install -y -n asimov conda-build
7070
pip install .
71+
pip install -U bilby bilby_pipe
7172
# Temporarily use the asimov-gwdata branch
7273
pip install git+https://git.ligo.org/asimov/pipelines/gwdata.git@update-htcondor
7374
@@ -209,16 +210,16 @@ jobs:
209210
$ASIMOV_CMD asimov manage build
210211
$ASIMOV_CMD asimov manage submit
211212
sleep 10
212-
sleep 60
213213
214214
condor_q || true
215-
echo 'Waiting for PSD files to appear...'
216-
TIMEOUT=1200; ELAPSED=0; INTERVAL=30
215+
echo 'Waiting for result file to appear...'
216+
TIMEOUT=2400; ELAPSED=0; INTERVAL=30
217217
while [ \$ELAPSED -lt \$TIMEOUT ]; do
218218
condor_q || true
219219
RESULT_COUNT=0
220-
tail -n 20 working/GW150914_095045/bilby-IMRPhenomXPHM/log_*/*.err || true
221-
for f in working/GW150914_095045/bilby-IMRPhenomXPHM/*merge*_result.hdf5; do
220+
tail -n 1 working/GW150914_095045/bilby-IMRPhenomXPHM/log_data_analysis/*.out || true
221+
ls -lh working/GW150914_095045/bilby-IMRPhenomXPHM/result/ || true
222+
for f in working/GW150914_095045/bilby-IMRPhenomXPHM/final_result/*.hdf5; do
222223
[ -e "\$f" ] && RESULT_COUNT=\$((RESULT_COUNT + 1))
223224
done
224225
if [ \$RESULT_COUNT -gt 0 ]; then
@@ -236,4 +237,4 @@ jobs:
236237
exit 1
237238
fi
238239
$ASIMOV_CMD asimov monitor
239-
EOF
240+
EOF

asimov/cli/manage.py

Lines changed: 40 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -455,84 +455,51 @@ def submit(event, update, dryrun):
455455
production.status = "running"
456456
else:
457457
pipe = production.pipeline
458-
# check the priority status to see if we need to start
459-
# the analysis
460-
to_analyse = True
461-
if production.status not in {"ready"}:
462-
to_analyse = False
463-
else:
464-
# verify priority method to be used
465-
priority_method = check_priority_method(production)
466-
if priority_method == "vanilla":
467-
N_ok = 0
468-
for prod in production._needs:
469-
if interest_dict_single_analysis[production.event.name][prod]['done']:
470-
N_ok += 1
471-
if N_ok < len(production._needs):
472-
to_analyse = False
473-
elif priority_method == "is_interesting":
474-
if "minimum" in production.meat["needs settings"].keys():
475-
N_target = int(production.meta["needs settings"]["minimum"])
476-
else:
477-
# all pipelines should indicate the run as interesting
478-
N_target = len(production._needs)
479-
for prod in production._needs:
480-
if interest_dict_single_analysis[production.event.name][prod]['interest status']:
481-
N_ok += 1
482-
if N_ok < N_target:
483-
to_analyse = False
484-
else:
485-
raise ValueError(f"Priority method {priority_method} not recognized")
486-
if to_analyse:
487-
try:
488-
pipe.build_dag(dryrun=dryrun)
489-
except PipelineException as e:
490-
logger.error(
491-
"failed to build a DAG file.",
492-
)
493-
logger.exception(e)
494-
click.echo(
495-
click.style("●", fg="red")
496-
+ f" Unable to submit {production.name}"
497-
)
498-
except ValueError:
499-
logger.info("Unable to submit an unbuilt production")
458+
459+
try:
460+
pipe.build_dag(dryrun=dryrun)
461+
except PipelineException as e:
462+
logger.error(
463+
"failed to build a DAG file.",
464+
)
465+
logger.exception(e)
466+
click.echo(
467+
click.style("●", fg="red")
468+
+ f" Unable to submit {production.name}"
469+
)
470+
except ValueError:
471+
logger.info("Unable to submit an unbuilt production")
472+
click.echo(
473+
click.style("●", fg="red")
474+
+ f" Unable to submit {production.name} as it hasn't been built yet."
475+
)
476+
click.echo("Try running `asimov manage build` first.")
477+
try:
478+
pipe.submit_dag(dryrun=dryrun)
479+
if not dryrun:
500480
click.echo(
501-
click.style("●", fg="red")
502-
+ f" Unable to submit {production.name} as it hasn't been built yet."
481+
click.style("●", fg="green")
482+
+ f" Submitted {production.event.name}/{production.name}"
503483
)
504-
click.echo("Try running `asimov manage build` first.")
505-
try:
506-
pipe.submit_dag(dryrun=dryrun)
507-
if not dryrun:
508-
click.echo(
509-
click.style("●", fg="green")
510-
+ f" Submitted {production.event.name}/{production.name}"
511-
)
512-
production.status = "running"
484+
production.status = "running"
513485

514-
except PipelineException as e:
515-
production.status = "stuck"
516-
click.echo(
517-
click.style("●", fg="red")
518-
+ f" Unable to submit {production.name}"
519-
)
520-
logger.exception(e)
521-
ledger.update_event(event)
522-
logger.error(
523-
f"The pipeline failed to submit the DAG file to the cluster. {e}",
524-
)
525-
if not dryrun:
526-
# Refresh the job list
527-
job_list = condor.CondorJobList()
528-
job_list.refresh()
529-
# Update the ledger
530-
ledger.update_event(event)
531-
else:
486+
except PipelineException as e:
487+
production.status = "stuck"
532488
click.echo(
533-
click.style("●", fg="yellow")
534-
+ f"Production {production.name} not ready to submit"
489+
click.style("●", fg="red")
490+
+ f" Unable to submit {production.name}"
535491
)
492+
logger.exception(e)
493+
ledger.update_event(event)
494+
logger.error(
495+
f"The pipeline failed to submit the DAG file to the cluster. {e}",
496+
)
497+
if not dryrun:
498+
# Refresh the job list
499+
job_list = condor.CondorJobList()
500+
job_list.refresh()
501+
# Update the ledger
502+
ledger.update_event(event)
536503

537504
@click.option(
538505
"--event",

tests/test_blueprints/gwosc_quick_test.yaml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,13 @@ pipelines:
1818
V1: V1:DQ_ANALYSIS_STATE_VECTOR
1919
sampler:
2020
sampler: dynesty
21-
parallel jobs: 2
22-
sampler kwargs: "{nlive: 100, dlogz: 1, nact: 5}"
21+
parallel jobs: 1
22+
sampler kwargs: "{nlive: 50, dlogz: 1, naccept: 5, check_point_delta_t: 1800, 'print_method': 'interval-10', 'sample': 'acceptance-walk'}"
2323
scheduler:
2424
accounting group: ligo.dev.o4.cbc.pe.bilby
2525
request cpus: 4
2626
request memory: 2.0
27+
cosmology: Planck15_lal
2728
bayeswave:
2829
quality:
2930
state vector:

0 commit comments

Comments
 (0)