diff --git a/CHANGELOG.md b/CHANGELOG.md index bee6bfd345..39740cc671 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,8 @@ About changelog [here](https://keepachangelog.com/en/1.0.0/) - On managed variants page, print maintainers names instead of emails/ids (#6121) - Enable ClinVar test API URL by default on Scout demo config settings (#6129) - ClinVar settings (API and list of users allowed to send API submissions) are no longer shown only to admin users (#6134) +- Refactor, speedup (dry-run only) and add a progress bar to `scout delete variants` cmd (#6094) +- `scout delete variants` command now accepts an optional `--out-file` where to print a detailed report of the deletion process (#6094) ### Fixed - Comments' text wrapping in ACMG classifications exported as PDF (#6086) - Individual breakpoint gDNA IGV links should not trigger split locus view (#6103) diff --git a/scout/adapter/mongo/query.py b/scout/adapter/mongo/query.py index ffe06796c7..7718b46c85 100644 --- a/scout/adapter/mongo/query.py +++ b/scout/adapter/mongo/query.py @@ -1,7 +1,7 @@ import logging import re from datetime import datetime, timedelta -from typing import List, Optional, Union +from typing import List, Optional, Set, Union from scout.constants import ( CLINSIG_MAP, @@ -52,31 +52,30 @@ def build_case_query( def delete_variants_query( self, case_id: str, - variants_to_keep: List[str] = [], + variants_to_keep: Set[str] | None = None, min_rank_threshold: Optional[int] = None, - keep_ctg: List[str] = [], + remove_ctg: List[str] | None = None, ) -> dict: - """Build a query to delete variants from a case (variant collection). + """Build a query to delete variants from a case. Removes variants with rank lower than `min_rank_threshold`. - Retains variants in categories `keep_ctg` by excluding them from deletion - eg `["cancer", "cancer_sv"]`. + Retains variants in categories `keep_ctg` and specific variant IDs in + `variants_to_keep` by excluding them from deletion. """ - variants_query = {} - case_subquery = {"case_id": case_id} - - # Create query to delete all variants that shouldn't be kept or with rank higher than min_rank_threshold - if variants_to_keep or min_rank_threshold or keep_ctg: - variants_query["$and"] = [case_subquery] - if variants_to_keep: - variants_query["$and"].append({"_id": {"$nin": variants_to_keep}}) - if keep_ctg: - variants_query["$and"].append({"category": {"$nin": keep_ctg}}) - if min_rank_threshold: - variants_query["$and"].append({"rank_score": {"$lt": min_rank_threshold}}) - else: - variants_query = case_subquery - - return variants_query + + query: dict = { + "case_id": case_id, + "$or": [ + {"rank_score": {"$lt": min_rank_threshold}}, + {"rank_score": {"$exists": False}}, + ], + } + if remove_ctg: + query["category"] = {"$in": remove_ctg} + if variants_to_keep: + query["_id"] = {"$nin": variants_to_keep} + + return query def build_variant_query( self, diff --git a/scout/commands/delete/variants.py b/scout/commands/delete/variants.py index cd5d8c06e5..2ff75e3ca7 100644 --- a/scout/commands/delete/variants.py +++ b/scout/commands/delete/variants.py @@ -1,17 +1,19 @@ -from typing import List, Optional, Tuple +import logging +from datetime import datetime, timedelta +from typing import Dict, List, Optional, TextIO, Tuple import click from flask import current_app, url_for from flask.cli import with_appcontext -from scout.adapter.mongo import MongoAdapter +from scout.adapter import MongoAdapter from scout.constants import ANALYSIS_TYPES, CASE_STATUSES, VARIANTS_TARGET_FROM_CATEGORY from scout.server.extensions import store -BYTES_IN_ONE_GIGABYTE = 1073741824 +delete_stats = {"case_counter": 0, "deleted_variant_counter": 0, "deleted_outlier_counter": 0} +LOG = logging.getLogger(__name__) DELETE_VARIANTS_HEADER = [ "Case n.", - "Ncases", "Institute", "Case name", "Case ID", @@ -19,24 +21,206 @@ "Analysis date", "Status", "Research", - "Total variants", - "Removed variants", + "DNA variants", + "Removed DNA variants", + "Removed Outlier variants", ] -VARIANT_CATEGORIES = list(VARIANTS_TARGET_FROM_CATEGORY.keys()) +VARIANT_CTG = sorted(VARIANTS_TARGET_FROM_CATEGORY) +OUTLIERS_CTG = ["outlier"] +TIMESTAMP = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") +OUTFILE = f"variant_cleanup_report_{TIMESTAMP}.tsv" -def _set_keep_ctg(keep_ctg: Tuple[str], rm_ctg: Tuple[str]) -> List[str]: - """Define the categories of variants that should not be removed.""" - if keep_ctg and rm_ctg: - raise click.UsageError("Please use either '--keep-ctg' or '--rm-ctg', not both.") - if keep_ctg: - return list(keep_ctg) - if rm_ctg: - return list(set(VARIANT_CATEGORIES).difference(set(rm_ctg))) - return [] +def handle_delete_variants( + store: MongoAdapter, + remove_ctg: List[str], + outlier_remove_ctg: List[str], + dry_run: bool, + variants_query: dict, +) -> Tuple[int, int]: + """ + Handle variant removal for a case. + + If dry_run is True, do not delete. Also do not count again, as this needlessly takes a long time and does not do exactly the same thing. + Returns a tuple: (number_deleted_variants, number_deleted_omics_variants) + """ + + remove_n_variants = 0 + remove_n_omics_variants = 0 + + if not dry_run: + if remove_ctg: + remove_n_variants = store.variant_collection.delete_many(variants_query).deleted_count + if outlier_remove_ctg: + remove_n_omics_variants = store.omics_variant_collection.delete_many( + variants_query + ).deleted_count + return remove_n_variants, remove_n_omics_variants + + +def _create_delete_variants_event( + user_obj: dict, + case_id: str, + institute_id: str, + display_name: str, + rank_threshold: int, + variants_threshold: Optional[int], +) -> None: + institute_obj = store.institute(institute_id) + with current_app.test_request_context("/cases"): + url = url_for( + "cases.case", + institute_id=institute_obj["_id"], + case_name=display_name, + ) + + threshold_parts: List[str] = [] + threshold_parts.append(f"Rank-score threshold:{rank_threshold}") + if variants_threshold is not None: + threshold_parts.append(f"case n. variants threshold:{variants_threshold}") + content_str = ", ".join(threshold_parts) + + store.remove_variants_event( + institute=institute_obj, + case=store.case(case_id=case_id), + user=user_obj, + link=url, + content=content_str, + ) + + +def _process_cases( + cases: List[dict], + user_obj: Dict, + rank_threshold: int | None, + variants_threshold: int | None, + institute: str | None, + status: str | None, + older_than: int | None, + analysis_type: list | None, + remove_ctg: List[str], + outlier_remove_ctg: List[str], + dry_run: bool, + output_file: TextIO, +) -> None: + """ + First fetches all cases according to user's filters (relatively light query). + Then processes them one by one by calling _process_single_case. + """ + match = {} + if cases: + match["_id"] = {"$in": cases} + if institute: + match["owner"] = institute + if status: + match["status"] = {"$in": list(status)} + if older_than: + older_than_date = datetime.now() - timedelta(weeks=older_than * 4) + match["analysis_date"] = {"$lt": older_than_date} + if analysis_type: + match["individuals.analysis_type"] = {"$in": analysis_type} + projection = { + "_id": 1, + "display_name": 1, + "suspects": 1, + "causatives": 1, + "status": 1, + "owner": 1, + "analysis_date": 1, + "individuals": 1, + } -def get_case_ids(case_file: Optional[str], case_id: List[str]) -> List[str]: + cases_list = list(store.case_collection.find(match, projection)) + total_cases = store.case_collection.count_documents(match) + with click.progressbar(cases_list, length=total_cases, label="Processing cases") as cases_bar: + for case in cases_bar: + _process_single_case( + case=case, + user_obj=user_obj, + rank_threshold=rank_threshold, + variants_threshold=variants_threshold, + remove_ctg=remove_ctg, + outlier_remove_ctg=outlier_remove_ctg, + dry_run=dry_run, + output_file=output_file, + ) + + +def _process_single_case( + case: dict, + user_obj: dict, + rank_threshold: int | None, + variants_threshold: int | None, + remove_ctg: List[str], + outlier_remove_ctg: List[str], + dry_run: bool, + output_file: TextIO, +) -> None: + """ + Process a single case + - Deletes variants (honoring suspects/causatives/evaluated-not-dismissed) + - Updates counters in delete_stats + - Creates events and updates case variant counts" + """ + variant_count = store.variant_collection.count_documents( + {"case_id": case["_id"]} + ) + store.omics_variant_collection.count_documents({"case_id": case["_id"]}) + + if variants_threshold is not None and variant_count < variants_threshold: + return + delete_stats["case_counter"] += 1 + case_evaluated, _ = store.evaluated_variants( + case_id=case["_id"], institute_id=case["owner"], limit_dismissed=5 + ) + evaluated_not_dismissed = [ + variant["_id"] for variant in case_evaluated if "dismiss_variant" not in variant + ] + variants_to_keep = ( + case.get("suspects", []) + case.get("causatives", []) + evaluated_not_dismissed + ) + + variants_query: dict = store.delete_variants_query( + case_id=case["_id"], + variants_to_keep=set(variants_to_keep), + min_rank_threshold=rank_threshold, + remove_ctg=remove_ctg + outlier_remove_ctg, + ) + removed_variants, removed_omics_variants = handle_delete_variants( + store=store, + remove_ctg=list(remove_ctg), + outlier_remove_ctg=outlier_remove_ctg, + dry_run=dry_run, + variants_query=variants_query, + ) + delete_stats["deleted_variant_counter"] += removed_variants + delete_stats["deleted_outlier_counter"] += removed_omics_variants + + if dry_run: + click.echo( + f"{delete_stats['case_counter']}\t{case['owner']}\t{case['display_name']}\t{case['_id']}\t{case.get('track', '')}\t{case.get('analysis_date')}\t{case.get('status', '')}\t{case.get('is_research', '')}\t{variant_count}\tNA\tNA", + file=output_file, + ) + return + + click.echo( + f"{delete_stats['case_counter']}\t{case['owner']}\t{case['display_name']}\t{case['_id']}\t{case.get('track', '')}\t{case.get('analysis_date')}\t{case.get('status', '')}\t{case.get('is_research', '')}\t{variant_count}\t{removed_variants}\t{removed_omics_variants}", + file=output_file, + ) + _create_delete_variants_event( + user_obj=user_obj, + case_id=case["_id"], + institute_id=case["owner"], + display_name=case["display_name"], + rank_threshold=rank_threshold, + variants_threshold=variants_threshold, + ) + + # Update case variant count + store.case_variants_count(case["_id"], case["owner"], True) + + +def get_case_ids(case_file: Optional[str], case_id: Optional[List[str]]) -> List[str]: """Fetch the _id of the cases to remove variants from.""" if case_file and case_id: click.echo( @@ -50,27 +234,32 @@ def get_case_ids(case_file: Optional[str], case_id: List[str]) -> List[str]: ) -def handle_delete_variants( - store: MongoAdapter, keep_ctg: List[str], dry_run: bool, variants_query: dict -) -> Tuple[int]: - """Handle variant removal for a case or count how many variants would be removed if it's a simulation..""" +def _set_remove_ctg( + keep_ctg: Optional[Tuple[str, ...]], + rm_ctg: Optional[Tuple[str, ...]], +) -> tuple[list[str], list[str]]: + """Define the categories of variants that should be removed. + + Returns a tuple (remove_ctg, outlier_remove_ctg). + """ + + if keep_ctg and rm_ctg: + raise click.UsageError("Please use either '--keep-ctg' or '--rm-ctg', not both.") + + if rm_ctg: + remove_ctg = [ctg for ctg in rm_ctg if ctg not in OUTLIERS_CTG] + outlier_remove_ctg = [ctg for ctg in rm_ctg if ctg in OUTLIERS_CTG] + + elif keep_ctg: + keep_set = set(keep_ctg) + remove_ctg = [ctg for ctg in VARIANT_CTG if ctg not in keep_set.union(OUTLIERS_CTG)] + outlier_remove_ctg = [ctg for ctg in OUTLIERS_CTG if ctg not in keep_set] - if dry_run: - remove_n_variants = store.variant_collection.count_documents(variants_query) - remove_n_omics_variants = ( - store.omics_variant_collection.count_documents(variants_query) - if "outlier" not in keep_ctg - else 0 - ) else: - remove_n_variants = store.variant_collection.delete_many(variants_query).deleted_count - remove_n_omics_variants = ( - store.omics_variant_collection.delete_many(variants_query).deleted_count - if "outlier" not in keep_ctg - else 0 - ) + remove_ctg = [ctg for ctg in VARIANT_CTG if ctg not in OUTLIERS_CTG] + outlier_remove_ctg = OUTLIERS_CTG.copy() - return remove_n_variants, remove_n_omics_variants + return remove_ctg, outlier_remove_ctg @click.command("variants", short_help="Delete variants for one or more cases") @@ -90,7 +279,7 @@ def handle_delete_variants( default=[], help="Restrict to cases with specified status", ) -@click.option("--older-than", type=click.INT, default=0, help="Older than (months)") +@click.option("--older-than", type=click.INT, help="Older than (months)") @click.option( "--analysis-type", type=click.Choice(ANALYSIS_TYPES), @@ -101,14 +290,14 @@ def handle_delete_variants( @click.option("--variants-threshold", type=click.INT, help="With more variants than") @click.option( "--rm-ctg", - type=click.Choice(VARIANT_CATEGORIES), + type=click.Choice(VARIANT_CTG), multiple=True, required=False, help="Remove only the following categories", ) @click.option( "--keep-ctg", - type=click.Choice(VARIANT_CATEGORIES), + type=click.Choice(VARIANT_CTG), multiple=True, required=False, help="Keep the following categories", @@ -118,20 +307,27 @@ def handle_delete_variants( is_flag=True, help="Perform a simulation without removing any variant", ) +@click.option( + "--out-file", + type=click.Path(dir_okay=False, writable=True), + default=None, + help="Optional path to save the variant deletion report (TSV)", +) @with_appcontext def variants( user: str, - case_id: tuple, - case_file: str, - institute: str, - status: tuple, - older_than: int, - analysis_type: tuple, rank_threshold: int, - variants_threshold: int, - rm_ctg: tuple, - keep_ctg: tuple, - dry_run: bool, + case_id: Optional[Tuple[str, ...]] = None, + case_file: Optional[str] = None, + institute: Optional[str] = None, + status: Optional[Tuple[str, ...]] = None, + older_than: Optional[int] = None, + analysis_type: Optional[Tuple[str, ...]] = None, + variants_threshold: Optional[int] = None, + rm_ctg: Optional[Tuple[str, ...]] = None, + keep_ctg: Optional[Tuple[str, ...]] = None, + dry_run: bool = False, + out_file: Optional[str] = None, ) -> None: """Delete variants for one or more cases""" @@ -141,104 +337,37 @@ def variants( return case_ids = get_case_ids(case_file=case_file, case_id=case_id) + if out_file is None: + out_file = OUTFILE - total_deleted = 0 - - if dry_run: - click.echo("--------------- DRY RUN COMMAND ---------------") - items_name = "estimated deleted variants" - else: - click.confirm("Variants are going to be deleted from database. Continue?", abort=True) - items_name = "deleted variants" - - case_query = store.build_case_query( - case_ids=case_ids, - institute_id=institute, - status=status, - older_than=older_than, - analysis_type=analysis_type, - ) - # Estimate the average size of a variant document in database - avg_var_size = store.collection_stats("variant").get("avgObjSize", 0) # in bytes - - # Get all cases where case_query applies - n_cases = store.case_collection.count_documents(case_query) - cases = store.cases(query=case_query) - filters = ( - f"Rank-score threshold:{rank_threshold}, case n. variants threshold:{variants_threshold}." - ) - click.echo("\t".join(DELETE_VARIANTS_HEADER)) - keep_ctg = _set_keep_ctg(keep_ctg=keep_ctg, rm_ctg=rm_ctg) - - for nr, case in enumerate(cases, 1): - - case_id = case["_id"] - institute_id = case["owner"] - case_n_variants = store.variant_collection.count_documents( - {"case_id": case_id} - ) + store.omics_variant_collection.count_documents({"case_id": case_id}) - # Skip case if user provided a number of variants to keep and this number is less than total number of case variants - if variants_threshold and case_n_variants < variants_threshold: - continue - # Get evaluated variants for the case that haven't been dismissed - case_evaluated, _ = store.evaluated_variants(case_id=case_id, institute_id=institute_id) - evaluated_not_dismissed = [ - variant["_id"] for variant in case_evaluated if "dismiss_variant" not in variant - ] - # Do not remove variants that are either pinned, causative or evaluated not dismissed - variants_to_keep = ( - case.get("suspects", []) + case.get("causatives", []) + evaluated_not_dismissed or [] - ) - - variants_query: dict = store.delete_variants_query( - case_id, variants_to_keep, rank_threshold, keep_ctg - ) - - remove_n_variants, remove_n_omics_variants = handle_delete_variants( - store=store, keep_ctg=keep_ctg, dry_run=dry_run, variants_query=variants_query - ) - total_deleted += remove_n_variants + remove_n_omics_variants - click.echo( - "\t".join( - [ - str(nr), - str(n_cases), - case["owner"], - case["display_name"], - case_id, - case.get("track", ""), - str(case["analysis_date"]), - case.get("status", ""), - str(case.get("is_research", "")), - str(case_n_variants), - str(remove_n_variants + remove_n_omics_variants), - ] + with open(out_file, "w") as output_file: + if dry_run: + click.echo("--------------- DRY RUN COMMAND ---------------") + items_name = "estimated deleted variants" + else: + click.confirm( + "Variants are going to be deleted from database. Continue?", + abort=True, ) - ) - - if dry_run: # Do not create an associated event - continue + items_name = "deleted variants" - # Create event in database - institute_obj = store.institute(case["owner"]) - with current_app.test_request_context("/cases"): - url = url_for( - "cases.case", - institute_id=institute_obj["_id"], - case_name=case["display_name"], - ) - store.remove_variants_event( - institute=institute_obj, - case=case, - user=user_obj, - link=url, - content=filters, + remove_ctg, outlier_remove_ctg = _set_remove_ctg(keep_ctg=keep_ctg, rm_ctg=rm_ctg) + click.echo("\t".join(DELETE_VARIANTS_HEADER), file=output_file) + _process_cases( + cases=case_ids, + user_obj=user_obj, + rank_threshold=rank_threshold, + variants_threshold=variants_threshold, + institute=institute, + status=status, + older_than=older_than, + analysis_type=analysis_type, + remove_ctg=remove_ctg, + outlier_remove_ctg=outlier_remove_ctg, + dry_run=dry_run, + output_file=output_file, + ) + if not dry_run: + click.echo( + f"Total {items_name}: {delete_stats['deleted_variant_counter'] + delete_stats['deleted_outlier_counter']}", ) - - # Update case variants count - store.case_variants_count(case_id, institute_obj["_id"], True) - - click.echo(f"Total {items_name}: {total_deleted}") - click.echo( - f"Estimated space freed (GB): {round((total_deleted * avg_var_size) / BYTES_IN_ONE_GIGABYTE, 4)}" - ) diff --git a/tests/commands/delete/test_delete_variants.py b/tests/commands/delete/test_delete_variants.py index 80d8549769..6b1594e742 100644 --- a/tests/commands/delete/test_delete_variants.py +++ b/tests/commands/delete/test_delete_variants.py @@ -1,17 +1,18 @@ from scout.commands import cli +from scout.commands.delete.variants import DELETE_VARIANTS_HEADER from scout.server.extensions import store VARIANTS_QUERY = {"rank_score": {"$lt": 0}} -RANK_THRESHOLD = 0 +RANK_THRESHOLD = 10 VARIANTS_THRESHOLD = 10 -def test_delete_variants_dry_run(mock_app, case_obj, user_obj): +def test_delete_variants_dry_run(mock_app, case_obj, user_obj, tmp_path): """test command for cleaning variants collection - simulate deletion""" assert store.user_collection.find_one() - # Given a database with SNV variants + # GIVEN a database with SNV variants runner = mock_app.test_cli_runner() result = runner.invoke( cli, ["load", "variants", case_obj["_id"], "--snv", "--rank-threshold", 5] @@ -19,7 +20,9 @@ def test_delete_variants_dry_run(mock_app, case_obj, user_obj): assert result.exit_code == 0 n_initial_vars = sum(1 for _ in store.variant_collection.find()) - # Then the function that delete variants in dry run should run without error + # GIVEN a temp file path + out_file = tmp_path / "variant_report.tsv" + cmd_params = [ "delete", "variants", @@ -38,26 +41,38 @@ def test_delete_variants_dry_run(mock_app, case_obj, user_obj): "--keep-ctg", "str", "--dry-run", + "--out-file", + str(out_file), ] + + # THEN the function that delete variants in dry run should run without error result = runner.invoke(cli, cmd_params) assert result.exit_code == 0 - assert "estimated deleted variants" in result.output - assert "Estimated space freed" in result.output + + # THEN outfile should be populated + file_content = out_file.read_text() + lines = [line for line in file_content.splitlines() if line.strip()] + assert len(lines) > 1 + assert lines[0] == "\t".join(DELETE_VARIANTS_HEADER) # And no variants should be deleted assert sum(1 for _ in store.variant_collection.find()) == n_initial_vars -def test_delete_variants(mock_app, case_obj, user_obj): +def test_delete_variants(mock_app, case_obj, user_obj, tmp_path): """Test deleting variants using the delete variants command line""" - # Given a case with with SNV variants + # GIVEN a case with with SNV variants runner = mock_app.test_cli_runner() result = runner.invoke( cli, ["load", "variants", "--snv", "--rank-threshold", 0, case_obj["_id"]] ) assert result.exit_code == 0 nr_snvs = sum(1 for _ in store.variant_collection.find()) + assert nr_snvs + + # GIVEN a temp file path + out_file = tmp_path / "variant_report.tsv" # AND WTS outliers result = runner.invoke( @@ -65,6 +80,8 @@ def test_delete_variants(mock_app, case_obj, user_obj): ) assert result.exit_code == 0 nr_outliers = sum(1 for _ in store.omics_variant_collection.find()) + assert nr_outliers + assert nr_outliers n_initial_vars = nr_snvs + nr_outliers @@ -78,6 +95,8 @@ def test_delete_variants(mock_app, case_obj, user_obj): "inactive", "--keep-ctg", "outlier", + "--keep-ctg", + "outlier", "--older-than", 2, "--analysis-type", @@ -86,12 +105,19 @@ def test_delete_variants(mock_app, case_obj, user_obj): RANK_THRESHOLD, "--variants-threshold", VARIANTS_THRESHOLD, + "--out-file", + str(out_file), ] result = runner.invoke(cli, cmd_params, input="y") assert result.exit_code == 0 - assert "estimated deleted variants" not in result.output - assert "Estimated space freed" in result.output + + # THEN outfile should be populated + file_content = out_file.read_text() + lines = [line for line in file_content.splitlines() if line.strip()] + assert len(lines) > 1 + assert lines[0] == "\t".join(DELETE_VARIANTS_HEADER) + # variants should be deleted n_current_vars = sum(1 for _ in store.variant_collection.find()) assert n_current_vars < n_initial_vars @@ -101,15 +127,17 @@ def test_delete_variants(mock_app, case_obj, user_obj): assert event["case"] == case_obj["_id"] assert ( event["content"] - == f"Rank-score threshold:0, case n. variants threshold:{VARIANTS_THRESHOLD}." + == f"Rank-score threshold:{RANK_THRESHOLD}, case n. variants threshold:{VARIANTS_THRESHOLD}" ) - # SNV variants should be gone - assert sum(1 for _ in store.variant_collection.find()) == 0 + # SNV variants with rank score < RANK_THRESHOLD should be gone + for var in store.variant_collection.find(): + assert var["rank_score"] >= 0 + # WHILE outliers should still be available assert sum(1 for _ in store.omics_variant_collection.find()) == nr_outliers -def test_delete_outlier_variants(mock_app, case_obj, user_obj): +def test_delete_outlier_variants(mock_app, case_obj, user_obj, tmp_path): """Test the delete variants command's ability to remove omics variants.""" # Given a case with with (research) outlier variants @@ -123,6 +151,9 @@ def test_delete_outlier_variants(mock_app, case_obj, user_obj): n_variants_to_delete = store.omics_variant_collection.count_documents({}) assert n_variants_to_delete + # GIVEN a temp file path + out_file = tmp_path / "variant_report.tsv" + # WHEN variants are removed using the command line cmd_params = [ "delete", @@ -130,20 +161,27 @@ def test_delete_outlier_variants(mock_app, case_obj, user_obj): "-u", user_obj["email"], "--rank-threshold", - 0, + RANK_THRESHOLD, "--rm-ctg", "outlier", + "--out-file", + str(out_file), ] result = runner.invoke(cli, cmd_params, input="y") assert result.exit_code == 0 - assert "estimated deleted variants" not in result.output - assert "Estimated space freed" in result.output + + # THEN outfile should be populated + file_content = out_file.read_text() + lines = [line for line in file_content.splitlines() if line.strip()] + assert len(lines) > 1 + assert lines[0] == "\t".join(DELETE_VARIANTS_HEADER) # THEN the variants should be gone - n_current_vars = sum(1 for _ in store.variant_collection.find()) + n_current_vars = sum(1 for _ in store.omics_variant_collection.find()) + assert n_current_vars == 0 assert n_current_vars + n_variants_to_delete == n_initial_vars # and a relative event should be created event = store.event_collection.find_one({"verb": "remove_variants"}) assert event["case"] == case_obj["_id"] - assert "Rank-score threshold:0" in event["content"] + assert f"Rank-score threshold:{RANK_THRESHOLD}" in event["content"]