Clinical-Genomics · torbjorgen · Jun 26, 2025 · Jul 3, 2025 · Jul 1, 2025 · Jul 2, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -12,6 +12,9 @@ Try to use the following format:
 ### Fixed 
 - Add scoring normalisation for flag lookup mode ([#177](https://github.com/Clinical-Genomics/genmod/pull/177))
 - Fix crash on test for missing annotation key for phased mode ([#178](https://github.com/Clinical-Genomics/genmod/pull/178))
+- Fix for deadlock in compound scoring, improved error logging ([#180](https://github.com/Clinical-Genomics/genmod/pull/180))
+### Changed
+- Add option to allow rescoring of (compound) variants ([#180](https://github.com/Clinical-Genomics/genmod/pull/180))
 
 ## [3.10.1]
 ### Fixed

diff --git a/Makefile b/Makefile
@@ -20,5 +20,17 @@ test_dist: docker-build
 		pip3 install dist/genmod*.tar.gz && \
 		uv run genmod -v annotate --annotate-regions --genome-build 37 examples/test_vcf.vcf"
 
+test_rescore: docker-build
+	$(DOCKER) run -i -l genmod-test genmod/test -v -s -o log_cli=true -k test_rescore_with_annotation_suffix 2>&1
+
+test_mivmir: docker-build
+	$(DOCKER) run -i -l genmod-test genmod/test -v -s -o log_cli=true -k test_mivmir_minimal_score_config 2>&1
+
+build-export-singularity:
+	$(DOCKER) build -t genmod/hasta --force-rm=true --rm=true -f Dockerfile .
+	docker save genmod/hasta -o genmod.tar
+	singularity build -F genmod.sif docker-archive://genmod.tar
+	rm genmod.tar
+
 docker-clean-images:
 	docker system prune
diff --git a/genmod/commands/score_compounds.py b/genmod/commands/score_compounds.py
@@ -17,8 +17,9 @@
 import sys
 from codecs import open
 from datetime import datetime
-from multiprocessing import JoinableQueue, Manager, cpu_count, util
+from multiprocessing import JoinableQueue, Manager, cpu_count, log_to_stderr, util
 from tempfile import NamedTemporaryFile
+from time import sleep
 
 import click
 
@@ -29,7 +30,8 @@
 
 from .utils import get_file_handle, outfile, processes, silent, temp_dir, variant_file
 
-logger = logging.getLogger(__name__)
+logger = log_to_stderr(logging.INFO)
+logging.basicConfig(stream=sys.stderr, force=True)
 util.abstract_sockets_supported = False
 
 
@@ -49,9 +51,24 @@
     default=9,
 )
 @click.option("--penalty", type=int, help="Penalty applied together with --threshold", default=6)
+@click.option(
+    "-s",
+    "--annotation_suffix",
+    default=None,
+    help="Target score with SUFFIX and append suffix to compound INFO fields (to not overwrite existing compound score entries).",
+)
 @click.pass_context
 def compound(
-    context, variant_file, silent, outfile, vep, threshold: int, penalty: int, processes, temp_dir
+    context,
+    variant_file,
+    silent,
+    outfile,
+    vep,
+    threshold: int,
+    penalty: int,
+    annotation_suffix: str,
+    processes,
+    temp_dir,
 ):
     """
     Score compound variants in a vcf file based on their rank score.
@@ -75,6 +92,13 @@ def compound(
         else:
             break
 
+    # Setup INFO field name suffix
+    if annotation_suffix is None:
+        annotation_suffix: str = ""  # i.e. add no suffix to INFO field name
+    else:
+        annotation_suffix: str = f"{annotation_suffix}"
+        logger.debug(f"Adding scoring suffix: {annotation_suffix}")
+
     logger.info("Headers parsed")
 
     if not line.startswith("#"):
@@ -88,7 +112,7 @@ def compound(
     add_metadata(
         head,
         "info",
-        "CompoundsNormalized",
+        "CompoundsNormalized" + annotation_suffix,
         annotation_number=".",
         entry_type="String",
         description="Rank score as provided by compound analysis, based on RankScoreNormalized. family_id:rank_score",
@@ -119,6 +143,7 @@ def compound(
             individuals=individuals,
             threshold=threshold,
             penalty=penalty,
+            annotation_suffix=annotation_suffix,
         )
         for i in range(num_scorers)
     ]
@@ -160,6 +185,15 @@ def compound(
         for i in range(num_scorers):
             variant_queue.put(None)
 
+        # Before joining on variant_queue, check whether workers have completed
+        # or failed, to avoid main process deadlock on never-decreasing queue semaphore.
+        while any([worker.is_alive() for worker in compound_scorers]):
+            sleep(1)  # Don't churn CPU
+            for worker in compound_scorers:
+                if not worker.is_alive() and worker.exitcode != 0:
+                    raise RuntimeError(f"Worker {worker} failed")
+                logger.debug(f"Worker {worker} alive")
+
         variant_queue.join()
         results.put(None)
         variant_printer.join()
@@ -172,7 +206,7 @@ def compound(
             for line in f:
                 print_variant(variant_line=line, outfile=outfile, mode="modified", silent=silent)
     except Exception as e:
-        logger.warning(e)
+        logger.error(e)
         for worker in compound_scorers:
             worker.terminate()
         variant_printer.terminate()

diff --git a/genmod/commands/score_variants.py b/genmod/commands/score_variants.py
@@ -66,6 +66,17 @@
 @click.option(
     "-c", "--score_config", type=click.Path(exists=True), help="The plug-in config file(.ini)"
 )
+@click.option(
+    "--skip_is_previously_scored_check",
+    is_flag=True,
+    help="Allow rescoring of previously scored VCF",
+)
+@click.option(
+    "-s",
+    "--annotation_suffix",
+    default=None,
+    help="Append suffix to all INFO fields related to scoring (to not overwrite existing entries).",
+)
 @click.pass_context
 def score(
     context,
@@ -77,6 +88,8 @@ def score(
     silent,
     skip_plugin_check,
     rank_results,
+    skip_is_previously_scored_check,
+    annotation_suffix,
     outfile,
 ):
     """
@@ -137,6 +150,13 @@ def score(
     else:
         logger.info("All plugins are defined in vcf")
 
+    # Setup INFO field name suffix
+    if annotation_suffix is None:
+        annotation_suffix: str = ""  # i.e. add no suffix to INFO field name
+    else:
+        annotation_suffix: str = f"{annotation_suffix}"
+        logger.debug(f"Adding scoring suffix: {annotation_suffix}")
+
     csq_format = head.vep_columns
     # Add the first variant to the iterator
     if not line.startswith("#"):
@@ -147,7 +167,7 @@ def score(
 
     header_line = head.header
 
-    if "RankScore" in head.info_dict:
+    if "RankScore" in head.info_dict and not skip_is_previously_scored_check:
         logger.warning("Variants already scored according to VCF header")
         logger.info("Please check VCF file")
         context.abort()
@@ -156,7 +176,7 @@ def score(
         add_metadata(
             head,
             "info",
-            rank_score_type,
+            rank_score_type + annotation_suffix,
             annotation_number=".",
             entry_type="String",
             description=rank_score_description,
@@ -165,7 +185,7 @@ def score(
     add_metadata(
         head,
         "info",
-        "RankScoreMinMax",
+        f"RankScoreMinMax{annotation_suffix}",
         annotation_number=".",
         entry_type="String",
         description="The rank score MIN-MAX bounds. family_id:min:max.",
@@ -175,7 +195,7 @@ def score(
         add_metadata(
             head,
             "info",
-            "RankResult",
+            "RankResult" + annotation_suffix,
             annotation_number=".",
             entry_type="String",
             description="|".join(score_categories),
@@ -221,19 +241,19 @@ def score(
             )
 
             variant = add_vcf_info(
-                keyword="RankScore",
+                keyword="RankScore" + annotation_suffix,
                 variant_dict=variant,
                 annotation="{0}:{1}".format(family_id, rank_score),
             )
 
             variant: dict = add_vcf_info(
-                keyword="RankScoreNormalized",
+                keyword=f"RankScoreNormalized{annotation_suffix}",
                 variant_dict=variant,
                 annotation="{0}:{1}".format(family_id, rank_score_normalized),
             )
 
             variant: dict = add_vcf_info(
-                keyword="RankScoreMinMax",
+                keyword=f"RankScoreMinMax{annotation_suffix}",
                 variant_dict=variant,
                 annotation="{0}:{1}:{2}".format(
                     family_id, category_scores_min, category_scores_max
@@ -242,7 +262,9 @@ def score(
 
             if rank_results:
                 variant = add_vcf_info(
-                    keyword="RankResult", variant_dict=variant, annotation="|".join(category_scores)
+                    keyword="RankResult" + annotation_suffix,
+                    variant_dict=variant,
+                    annotation="|".join(category_scores),
                 )
 
             print_variant(