Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ Try to use the following format:
### Fixed
- Add scoring normalisation for flag lookup mode ([#177](https://github.com/Clinical-Genomics/genmod/pull/177))
- Fix crash on test for missing annotation key for phased mode ([#178](https://github.com/Clinical-Genomics/genmod/pull/178))
- Fix for deadlock in compound scoring, improved error logging ([#180](https://github.com/Clinical-Genomics/genmod/pull/180))
### Changed
- Add option to allow rescoring of (compound) variants ([#180](https://github.com/Clinical-Genomics/genmod/pull/180))

## [3.10.1]
### Fixed
Expand Down
12 changes: 12 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,17 @@ test_dist: docker-build
pip3 install dist/genmod*.tar.gz && \
uv run genmod -v annotate --annotate-regions --genome-build 37 examples/test_vcf.vcf"

test_rescore: docker-build
$(DOCKER) run -i -l genmod-test genmod/test -v -s -o log_cli=true -k test_rescore_with_annotation_suffix 2>&1

test_mivmir: docker-build
$(DOCKER) run -i -l genmod-test genmod/test -v -s -o log_cli=true -k test_mivmir_minimal_score_config 2>&1

build-export-singularity:
$(DOCKER) build -t genmod/hasta --force-rm=true --rm=true -f Dockerfile .
docker save genmod/hasta -o genmod.tar
singularity build -F genmod.sif docker-archive://genmod.tar
rm genmod.tar

docker-clean-images:
docker system prune
44 changes: 39 additions & 5 deletions genmod/commands/score_compounds.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,9 @@
import sys
from codecs import open
from datetime import datetime
from multiprocessing import JoinableQueue, Manager, cpu_count, util
from multiprocessing import JoinableQueue, Manager, cpu_count, log_to_stderr, util
from tempfile import NamedTemporaryFile
from time import sleep

import click

Expand All @@ -29,7 +30,8 @@

from .utils import get_file_handle, outfile, processes, silent, temp_dir, variant_file

logger = logging.getLogger(__name__)
logger = log_to_stderr(logging.INFO)
logging.basicConfig(stream=sys.stderr, force=True)
util.abstract_sockets_supported = False


Expand All @@ -49,9 +51,24 @@
default=9,
)
@click.option("--penalty", type=int, help="Penalty applied together with --threshold", default=6)
@click.option(
"-s",
"--annotation_suffix",
default=None,
help="Target score with SUFFIX and append suffix to compound INFO fields (to not overwrite existing compound score entries).",
)
@click.pass_context
def compound(
context, variant_file, silent, outfile, vep, threshold: int, penalty: int, processes, temp_dir
context,
variant_file,
silent,
outfile,
vep,
threshold: int,
penalty: int,
annotation_suffix: str,
processes,
temp_dir,
):
"""
Score compound variants in a vcf file based on their rank score.
Expand All @@ -75,6 +92,13 @@ def compound(
else:
break

# Setup INFO field name suffix
if annotation_suffix is None:
annotation_suffix: str = "" # i.e. add no suffix to INFO field name
else:
annotation_suffix: str = f"{annotation_suffix}"
logger.debug(f"Adding scoring suffix: {annotation_suffix}")

logger.info("Headers parsed")

if not line.startswith("#"):
Expand All @@ -88,7 +112,7 @@ def compound(
add_metadata(
head,
"info",
"CompoundsNormalized",
"CompoundsNormalized" + annotation_suffix,
annotation_number=".",
entry_type="String",
description="Rank score as provided by compound analysis, based on RankScoreNormalized. family_id:rank_score",
Expand Down Expand Up @@ -119,6 +143,7 @@ def compound(
individuals=individuals,
threshold=threshold,
penalty=penalty,
annotation_suffix=annotation_suffix,
)
for i in range(num_scorers)
]
Expand Down Expand Up @@ -160,6 +185,15 @@ def compound(
for i in range(num_scorers):
variant_queue.put(None)

# Before joining on variant_queue, check whether workers have completed
# or failed, to avoid main process deadlock on never-decreasing queue semaphore.
while any([worker.is_alive() for worker in compound_scorers]):
sleep(1) # Don't churn CPU
for worker in compound_scorers:
if not worker.is_alive() and worker.exitcode != 0:
raise RuntimeError(f"Worker {worker} failed")
logger.debug(f"Worker {worker} alive")

variant_queue.join()
results.put(None)
variant_printer.join()
Expand All @@ -172,7 +206,7 @@ def compound(
for line in f:
print_variant(variant_line=line, outfile=outfile, mode="modified", silent=silent)
except Exception as e:
logger.warning(e)
logger.error(e)
for worker in compound_scorers:
worker.terminate()
variant_printer.terminate()
Expand Down
38 changes: 30 additions & 8 deletions genmod/commands/score_variants.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,17 @@
@click.option(
"-c", "--score_config", type=click.Path(exists=True), help="The plug-in config file(.ini)"
)
@click.option(
"--skip_is_previously_scored_check",
is_flag=True,
help="Allow rescoring of previously scored VCF",
)
@click.option(
"-s",
"--annotation_suffix",
default=None,
help="Append suffix to all INFO fields related to scoring (to not overwrite existing entries).",
)
@click.pass_context
def score(
context,
Expand All @@ -77,6 +88,8 @@ def score(
silent,
skip_plugin_check,
rank_results,
skip_is_previously_scored_check,
annotation_suffix,
outfile,
):
"""
Expand Down Expand Up @@ -137,6 +150,13 @@ def score(
else:
logger.info("All plugins are defined in vcf")

# Setup INFO field name suffix
if annotation_suffix is None:
annotation_suffix: str = "" # i.e. add no suffix to INFO field name
else:
annotation_suffix: str = f"{annotation_suffix}"
logger.debug(f"Adding scoring suffix: {annotation_suffix}")

csq_format = head.vep_columns
# Add the first variant to the iterator
if not line.startswith("#"):
Expand All @@ -147,7 +167,7 @@ def score(

header_line = head.header

if "RankScore" in head.info_dict:
if "RankScore" in head.info_dict and not skip_is_previously_scored_check:
logger.warning("Variants already scored according to VCF header")
logger.info("Please check VCF file")
context.abort()
Expand All @@ -156,7 +176,7 @@ def score(
add_metadata(
head,
"info",
rank_score_type,
rank_score_type + annotation_suffix,
annotation_number=".",
entry_type="String",
description=rank_score_description,
Expand All @@ -165,7 +185,7 @@ def score(
add_metadata(
head,
"info",
"RankScoreMinMax",
f"RankScoreMinMax{annotation_suffix}",
annotation_number=".",
entry_type="String",
description="The rank score MIN-MAX bounds. family_id:min:max.",
Expand All @@ -175,7 +195,7 @@ def score(
add_metadata(
head,
"info",
"RankResult",
"RankResult" + annotation_suffix,
annotation_number=".",
entry_type="String",
description="|".join(score_categories),
Expand Down Expand Up @@ -221,19 +241,19 @@ def score(
)

variant = add_vcf_info(
keyword="RankScore",
keyword="RankScore" + annotation_suffix,
variant_dict=variant,
annotation="{0}:{1}".format(family_id, rank_score),
)

variant: dict = add_vcf_info(
keyword="RankScoreNormalized",
keyword=f"RankScoreNormalized{annotation_suffix}",
variant_dict=variant,
annotation="{0}:{1}".format(family_id, rank_score_normalized),
)

variant: dict = add_vcf_info(
keyword="RankScoreMinMax",
keyword=f"RankScoreMinMax{annotation_suffix}",
variant_dict=variant,
annotation="{0}:{1}:{2}".format(
family_id, category_scores_min, category_scores_max
Expand All @@ -242,7 +262,9 @@ def score(

if rank_results:
variant = add_vcf_info(
keyword="RankResult", variant_dict=variant, annotation="|".join(category_scores)
keyword="RankResult" + annotation_suffix,
variant_dict=variant,
annotation="|".join(category_scores),
)

print_variant(
Expand Down
Loading
Loading