Skip to content

Commit 0133711

Browse files
author
Chiara Rasi
committed
Add a bunch of optionals, move tests to dedicated file
1 parent 6d253f1 commit 0133711

File tree

3 files changed

+205
-188
lines changed

3 files changed

+205
-188
lines changed

scout/commands/delete/variants.py

Lines changed: 59 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -47,95 +47,109 @@ def _log_case(cases: List[Dict], cid: str, deleted_n: int) -> None:
4747

4848

4949
def _process_batch(
50-
cases: List[dict],
51-
user_obj: dict,
52-
rank_threshold: int,
53-
variants_threshold: int,
50+
cases: List[Dict[str, Any]],
51+
user_obj: Dict[str, Any],
52+
rank_threshold: int | None,
53+
variants_threshold: int | None,
5454
keep_ctg: Iterable[str],
5555
dry_run: bool,
5656
) -> int:
57-
"""Process a batch of cases by performing bulk deletion of variants.
58-
Returns total number of deleted (or estimated deleted) variants.
5957
"""
60-
total_deleted = 0
61-
case_ids = [case["_id"] for case in cases]
58+
Process a batch of cases by performing bulk deletion of variants.
59+
60+
Creates events with threshold info, respecting optional thresholds.
61+
"""
6262

63-
# ---- OPTIONAL: pre-count variants for threshold ----
64-
counts = {}
65-
if variants_threshold:
63+
total_deleted: int = 0
64+
case_ids: List[Any] = [case["_id"] for case in cases]
65+
66+
# Optional variant count pre-computation
67+
counts: Dict[Any, int] = {}
68+
if variants_threshold is not None:
6669
pipeline = [
6770
{"$match": {"case_id": {"$in": case_ids}}},
6871
{"$group": {"_id": "$case_id", "n": {"$sum": 1}}},
6972
]
70-
7173
for doc in store.variant_collection.aggregate(pipeline, allowDiskUse=True):
7274
counts[doc["_id"]] = doc["n"]
7375

74-
delete_ops = []
75-
case_delete_map = {}
76+
delete_ops: List[DeleteMany] = []
77+
case_delete_map: Dict[Any, Dict[str, Any]] = {}
7678

7779
for case in cases:
7880
cid = case["_id"]
7981

80-
if variants_threshold and counts.get(cid, 0) < variants_threshold:
82+
# Skip case if variants_threshold is set
83+
if variants_threshold is not None and counts.get(cid, 0) < variants_threshold:
8184
continue
8285

83-
# Positive delete filter only
84-
delete_query = {
86+
delete_query: Dict[str, Any] = {
8587
"case_id": cid,
86-
"rank_score": {"$lt": rank_threshold},
87-
"category": {"$nin": keep_ctg},
88+
"category": {"$nin": list(keep_ctg)},
8889
}
90+
if rank_threshold is not None:
91+
delete_query["rank_score"] = {"$lt": rank_threshold}
8992

9093
delete_ops.append(DeleteMany(delete_query))
9194
case_delete_map[cid] = delete_query
9295

9396
if not delete_ops:
9497
return 0
9598

96-
# ---- DRY RUN ----
99+
# Dry-run mode
97100
if dry_run:
98101
for cid, query in case_delete_map.items():
99-
n = store.variant_collection.count_documents(query)
102+
n: int = store.variant_collection.count_documents(query)
100103
total_deleted += n
101-
_log_case(cases, cid, n)
102104
return total_deleted
103105

104-
# ---- REAL DELETE ----
105-
result = store.variant_collection.bulk_write(delete_ops, ordered=False)
106-
106+
# Execute bulk delete
107+
result: BulkWriteResult = store.variant_collection.bulk_write(
108+
delete_ops,
109+
ordered=False,
110+
)
107111
total_deleted += result.deleted_count
108112

109-
# ---- Post-delete: events + count update ----
113+
# Post-delete operations: events + variant count updates
110114
for case in cases:
111115
cid = case["_id"]
112-
113116
if cid not in case_delete_map:
114117
continue
115118

116-
institute_obj = store.institute(case["owner"])
119+
institute_obj: Dict[str, Any] = store.institute(case["owner"])
117120

118121
with current_app.test_request_context("/cases"):
119-
url = url_for(
122+
url: str = url_for(
120123
"cases.case",
121124
institute_id=institute_obj["_id"],
122125
case_name=case["display_name"],
123126
)
124127

128+
# -----------------------------
129+
# Build event content string
130+
# -----------------------------
131+
threshold_parts: List[str] = []
132+
if rank_threshold is not None:
133+
threshold_parts.append(f"Rank-score threshold:{rank_threshold}")
134+
if variants_threshold is not None:
135+
threshold_parts.append(f"case n. variants threshold:{variants_threshold}")
136+
137+
content_str: str = ", ".join(threshold_parts)
138+
125139
store.remove_variants_event(
126140
institute=institute_obj,
127141
case=case,
128142
user=user_obj,
129143
link=url,
130-
content=f"Rank-score threshold:{rank_threshold}",
144+
content=content_str,
131145
)
132146

133147
store.case_variants_count(cid, institute_obj["_id"], True)
134148

135149
return total_deleted
136150

137151

138-
def get_case_ids(case_file: Optional[str], case_id: List[str]) -> List[str]:
152+
def get_case_ids(case_file: Optional[str], case_id: Optional[List[str]]) -> List[str]:
139153
"""Fetch the _id of the cases to remove variants from."""
140154
if case_file and case_id:
141155
click.echo(
@@ -149,7 +163,9 @@ def get_case_ids(case_file: Optional[str], case_id: List[str]) -> List[str]:
149163
)
150164

151165

152-
def _set_keep_ctg(keep_ctg: Tuple[str], rm_ctg: Tuple[str]) -> List[str]:
166+
def _set_keep_ctg(
167+
keep_ctg: Optional[Tuple[str, ...]], rm_ctg: Optional[Tuple[str, ...]]
168+
) -> List[str]:
153169
"""Define the categories of variants that should not be removed."""
154170
if keep_ctg and rm_ctg:
155171
raise click.UsageError("Please use either '--keep-ctg' or '--rm-ctg', not both.")
@@ -208,17 +224,17 @@ def _set_keep_ctg(keep_ctg: Tuple[str], rm_ctg: Tuple[str]) -> List[str]:
208224
@with_appcontext
209225
def variants(
210226
user: str,
211-
case_id: tuple,
212-
case_file: str,
213-
institute: str,
214-
status: tuple,
215-
older_than: int,
216-
analysis_type: tuple,
217-
rank_threshold: int,
218-
variants_threshold: int,
219-
rm_ctg: tuple,
220-
keep_ctg: tuple,
221-
dry_run: bool,
227+
case_id: Optional[Tuple[str, ...]] = None,
228+
case_file: Optional[str] = None,
229+
institute: Optional[str] = None,
230+
status: Optional[Tuple[str, ...]] = None,
231+
older_than: Optional[int] = None,
232+
analysis_type: Optional[Tuple[str, ...]] = None,
233+
rank_threshold: Optional[int] = None,
234+
variants_threshold: Optional[int] = None,
235+
rm_ctg: Optional[Tuple[str, ...]] = None,
236+
keep_ctg: Optional[Tuple[str, ...]] = None,
237+
dry_run: bool = False,
222238
) -> None:
223239
"""Delete variants for one or more cases"""
224240

tests/commands/delete/test_delete_cmd.py

Lines changed: 0 additions & 145 deletions
Original file line numberDiff line numberDiff line change
@@ -3,151 +3,6 @@
33
from scout.commands.delete.delete_command import CASE_RNA_KEYS
44
from scout.server.extensions import store
55

6-
VARIANTS_QUERY = {"rank_score": {"$lt": 0}}
7-
RANK_THRESHOLD = 0
8-
VARIANTS_THRESHOLD = 10
9-
10-
11-
def test_delete_variants_dry_run(mock_app, case_obj, user_obj):
12-
"""test command for cleaning variants collection - simulate deletion"""
13-
14-
assert store.user_collection.find_one()
15-
16-
# Given a database with SNV variants
17-
runner = mock_app.test_cli_runner()
18-
result = runner.invoke(
19-
cli, ["load", "variants", case_obj["_id"], "--snv", "--rank-threshold", 5]
20-
)
21-
assert result.exit_code == 0
22-
n_initial_vars = sum(1 for _ in store.variant_collection.find())
23-
24-
# Then the function that delete variants in dry run should run without error
25-
cmd_params = [
26-
"delete",
27-
"variants",
28-
"-u",
29-
user_obj["email"],
30-
"--status",
31-
"inactive",
32-
"--older-than",
33-
2,
34-
"--analysis-type",
35-
"wes",
36-
"--rank-threshold",
37-
RANK_THRESHOLD,
38-
"--variants-threshold",
39-
VARIANTS_THRESHOLD,
40-
"--keep-ctg",
41-
"str",
42-
"--dry-run",
43-
]
44-
result = runner.invoke(cli, cmd_params)
45-
assert result.exit_code == 0
46-
assert "estimated deleted variants" in result.output
47-
48-
# And no variants should be deleted
49-
assert sum(1 for _ in store.variant_collection.find()) == n_initial_vars
50-
51-
52-
def test_delete_variants(mock_app, case_obj, user_obj):
53-
"""Test deleting variants using the delete variants command line"""
54-
55-
# Given a case with with SNV variants
56-
runner = mock_app.test_cli_runner()
57-
result = runner.invoke(
58-
cli, ["load", "variants", "--snv", "--rank-threshold", 0, case_obj["_id"]]
59-
)
60-
assert result.exit_code == 0
61-
nr_snvs = sum(1 for _ in store.variant_collection.find())
62-
63-
# AND WTS outliers
64-
result = runner.invoke(
65-
cli, ["load", "variants", "--outlier-research", case_obj["_id"], "--force"]
66-
)
67-
assert result.exit_code == 0
68-
nr_outliers = sum(1 for _ in store.omics_variant_collection.find())
69-
70-
n_initial_vars = nr_snvs + nr_outliers
71-
72-
# Then the function that delete variants should run without error
73-
cmd_params = [
74-
"delete",
75-
"variants",
76-
"-u",
77-
user_obj["email"],
78-
"--status",
79-
"inactive",
80-
"--keep-ctg",
81-
"outlier",
82-
"--older-than",
83-
2,
84-
"--analysis-type",
85-
"wes",
86-
"--rank-threshold",
87-
RANK_THRESHOLD,
88-
"--variants-threshold",
89-
VARIANTS_THRESHOLD,
90-
]
91-
result = runner.invoke(cli, cmd_params, input="y")
92-
93-
assert result.exit_code == 0
94-
assert "estimated deleted variants" not in result.output
95-
96-
# variants should be deleted
97-
n_current_vars = sum(1 for _ in store.variant_collection.find())
98-
assert n_current_vars < n_initial_vars
99-
100-
# and a relative event should be created
101-
event = store.event_collection.find_one({"verb": "remove_variants"})
102-
assert event["case"] == case_obj["_id"]
103-
assert (
104-
event["content"]
105-
== f"Rank-score threshold:0, case n. variants threshold:{VARIANTS_THRESHOLD}."
106-
)
107-
# SNV variants should be gone
108-
assert sum(1 for _ in store.variant_collection.find()) == 0
109-
# WHILE outliers should still be available
110-
assert sum(1 for _ in store.omics_variant_collection.find()) == nr_outliers
111-
112-
113-
def test_delete_outlier_variants(mock_app, case_obj, user_obj):
114-
"""Test the delete variants command's ability to remove omics variants."""
115-
116-
# Given a case with with (research) outlier variants
117-
runner = mock_app.test_cli_runner()
118-
result = runner.invoke(
119-
cli, ["load", "variants", "--outlier-research", case_obj["_id"], "--force"]
120-
)
121-
assert result.exit_code == 0
122-
n_initial_vars = sum(1 for _ in store.omics_variant_collection.find())
123-
assert n_initial_vars
124-
n_variants_to_delete = store.omics_variant_collection.count_documents({})
125-
assert n_variants_to_delete
126-
127-
# WHEN variants are removed using the command line
128-
cmd_params = [
129-
"delete",
130-
"variants",
131-
"-u",
132-
user_obj["email"],
133-
"--rank-threshold",
134-
0,
135-
"--rm-ctg",
136-
"outlier",
137-
]
138-
result = runner.invoke(cli, cmd_params, input="y")
139-
assert result.exit_code == 0
140-
assert "estimated deleted variants" not in result.output
141-
142-
# THEN the variants should be gone
143-
n_current_vars = sum(1 for _ in store.variant_collection.find())
144-
assert n_current_vars == 0
145-
assert n_current_vars + n_variants_to_delete == n_initial_vars
146-
# and a relative event should be created
147-
event = store.event_collection.find_one({"verb": "remove_variants"})
148-
assert event["case"] == case_obj["_id"]
149-
assert "Rank-score threshold:0" in event["content"]
150-
1516

1527
def test_delete_panel_non_existing(empty_mock_app, testpanel_obj):
1538
"Test the CLI command that deletes a gene panel"

0 commit comments

Comments
 (0)