Skip to content

Commit a6d1df6

Browse files
Added extra test for raw-reads pipeline flow skipping functional analysis.
1 parent 8d15876 commit a6d1df6

File tree

2 files changed

+262
-80
lines changed

2 files changed

+262
-80
lines changed

.talismanrc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ fileignoreconfig:
172172
checksum: ac644b851d31913ca9328d5a83516146c4c76874201df0a2d7086df6b60435c8
173173

174174
- filename: workflows/tests/test_analysis_rawreads_study_flow.py
175-
checksum: 0f7255bdd7303269337a7d9be17d5246bfc32980697ba4135f54be201d13180a
175+
checksum: 211d4cfcd3e1b0dbe6e8b92683f6160661c50a17429d470ea0651daa41f5c3f8
176176

177177
- filename: workflows/flows/upload_assembly.py
178178
allowed_patterns: [key]

workflows/tests/test_analysis_rawreads_study_flow.py

Lines changed: 261 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -42,14 +42,17 @@
4242
EMG_CONFIG = settings.EMG_CONFIG
4343

4444

45-
def generate_fake_rawreads_pipeline_results(results_dir, sample_accession):
45+
def generate_fake_rawreads_pipeline_results(
46+
results_dir, sample_accession, make_functional=True
47+
):
4648
"""
4749
Generate fake raw-reads pipeline results for testing.
4850
4951
Based on the directory structure provided in the issue description.
5052
5153
:param results_dir: Directory to create the fake results in
5254
:param sample_accession: Sample accession to use in file names
55+
:param make_functional: Whether to generate functional analysis (pfam) results
5356
"""
5457

5558
logger = logging.getLogger("generate_dummy_data_debug")
@@ -73,84 +76,85 @@ def generate_fake_rawreads_pipeline_results(results_dir, sample_accession):
7376
)
7477

7578
# Create function-summary directory and subdirectories
76-
func_dir = f"{results_dir}/{sample_accession}/function-summary"
77-
logger.info(f"Creating dummy functional results at {func_dir}")
78-
pfam_dir = f"{func_dir}/pfam"
79-
os.makedirs(pfam_dir, exist_ok=True)
80-
with gzip.open(f"{pfam_dir}/{sample_accession}_pfam.txt.gz", "wb") as f:
81-
f.write(
82-
dedent(
83-
"""\
84-
# function read_count coverage_depth coverage_breadth
85-
PF21175.2 1 0.9583333333333334 0.9583333333333334
86-
PF10418.14 1 0.926829268292683 0.926829268292683
87-
PF17802.7 1 0.7692307692307693 0.7692307692307693
88-
PF17769.7 1 0.7142857142857143 0.7142857142857143
89-
PF10531.15 1 0.6909090909090909 0.6909090909090909
90-
PF22269.2 1 0.6612903225806451 0.6612903225806451
91-
PF13411.12 1 0.6376811594202898 0.6376811594202898
92-
PF00515.34 1 0.5882352941176471 0.5882352941176471
93-
PF16320.10 1 0.5625 0.5625
94-
PF13186.11 1 0.5522388059701493 0.5522388059701493
95-
PF16124.10 1 0.5303030303030303 0.5303030303030303
96-
PF13807.11 1 0.47560975609756095 0.47560975609756095
97-
PF22811.2 1 0.4523809523809524 0.4523809523809524
98-
PF01782.24 1 0.4523809523809524 0.4523809523809524
99-
PF00009.33 2 0.4148936170212766 0.32978723404255317
100-
PF00005.33 2 0.40145985401459855 0.40145985401459855
101-
PF08428.16 1 0.39473684210526316 0.39473684210526316
102-
PF00679.30 1 0.3707865168539326 0.3707865168539326
103-
PF00448.28 2 0.3673469387755102 0.3673469387755102
104-
PF06755.17 1 0.35714285714285715 0.35714285714285715
105-
PF10800.13 1 0.34615384615384615 0.34615384615384615
106-
PF02922.24 1 0.3373493975903614 0.3373493975903614
107-
PF00472.26 1 0.33620689655172414 0.33620689655172414
108-
PF23139.1 1 0.32894736842105265 0.32894736842105265
109-
PF18818.7 1 0.30952380952380953 0.30952380952380953
110-
PF21018.2 1 0.2727272727272727 0.2727272727272727
111-
PF14284.11 1 0.272108843537415 0.272108843537415
112-
PF13288.12 1 0.2608695652173913 0.2608695652173913
113-
PF00308.24 1 0.24691358024691357 0.24691358024691357
114-
PF12978.13 1 0.24528301886792453 0.24528301886792453
115-
PF00724.26 2 0.2309941520467836 0.14912280701754385
116-
PF19306.5 1 0.2261904761904762 0.2261904761904762
117-
PF06924.17 1 0.22598870056497175 0.22598870056497175
118-
PF02397.22 1 0.22346368715083798 0.22346368715083798
119-
PF03816.19 1 0.20666666666666667 0.20666666666666667
120-
PF00849.27 1 0.19736842105263158 0.19736842105263158
121-
PF13614.12 1 0.1864406779661017 0.1864406779661017
122-
PF09985.14 1 0.17105263157894737 0.17105263157894737
123-
PF01435.24 1 0.16666666666666666 0.16666666666666666
124-
PF03796.21 1 0.1568627450980392 0.1568627450980392
125-
PF17657.6 1 0.1566265060240964 0.1566265060240964
126-
PF00814.31 1 0.15151515151515152 0.15151515151515152
127-
PF03613.19 1 0.1509433962264151 0.1509433962264151
128-
PF04898.20 1 0.1444043321299639 0.1444043321299639
129-
PF11997.14 1 0.1417910447761194 0.1417910447761194
130-
PF02601.20 1 0.12698412698412698 0.12698412698412698
131-
PF02896.24 1 0.12627986348122866 0.12627986348122866
132-
PF00393.24 1 0.11724137931034483 0.11724137931034483
133-
PF01702.25 1 0.10644257703081232 0.10644257703081232
134-
PF01041.23 1 0.10277777777777777 0.10277777777777777
135-
PF05649.18 1 0.10236220472440945 0.10236220472440945
136-
PF06965.17 1 0.10160427807486631 0.10160427807486631
137-
PF00478.31 1 0.10144927536231885 0.10144927536231885
138-
PF00860.26 1 0.10025706940874037 0.10025706940874037
139-
PF07971.18 1 0.09051724137931035 0.09051724137931035
140-
PF12979.12 1 0.08882521489971347 0.08882521489971347
141-
PF00330.25 1 0.08855291576673865 0.08855291576673865
142-
PF01425.27 1 0.08764044943820225 0.08764044943820225
143-
PF00171.27 1 0.08676789587852494 0.08676789587852494
144-
PF02652.20 1 0.07279693486590039 0.07279693486590039
145-
PF13597.11 1 0.060498220640569395 0.060498220640569395
146-
PF02901.20 1 0.06027820710973725 0.06027820710973725
147-
PF09586.16 1 0.045508982035928146 0.045508982035928146
148-
"""
149-
).encode()
150-
)
151-
os.makedirs(f"{pfam_dir}", exist_ok=True)
152-
with open(f"{pfam_dir}/{sample_accession}_pfam.stats.json", "wt") as f:
153-
f.write(r'{"reads_mapped": 67, "hmm_count": 63, "read_hit_count": 67}')
79+
if make_functional:
80+
func_dir = f"{results_dir}/{sample_accession}/function-summary"
81+
logger.info(f"Creating dummy functional results at {func_dir}")
82+
pfam_dir = f"{func_dir}/pfam"
83+
os.makedirs(pfam_dir, exist_ok=True)
84+
with gzip.open(f"{pfam_dir}/{sample_accession}_pfam.txt.gz", "wb") as f:
85+
f.write(
86+
dedent(
87+
"""\
88+
# function read_count coverage_depth coverage_breadth
89+
PF21175.2 1 0.9583333333333334 0.9583333333333334
90+
PF10418.14 1 0.926829268292683 0.926829268292683
91+
PF17802.7 1 0.7692307692307693 0.7692307692307693
92+
PF17769.7 1 0.7142857142857143 0.7142857142857143
93+
PF10531.15 1 0.6909090909090909 0.6909090909090909
94+
PF22269.2 1 0.6612903225806451 0.6612903225806451
95+
PF13411.12 1 0.6376811594202898 0.6376811594202898
96+
PF00515.34 1 0.5882352941176471 0.5882352941176471
97+
PF16320.10 1 0.5625 0.5625
98+
PF13186.11 1 0.5522388059701493 0.5522388059701493
99+
PF16124.10 1 0.5303030303030303 0.5303030303030303
100+
PF13807.11 1 0.47560975609756095 0.47560975609756095
101+
PF22811.2 1 0.4523809523809524 0.4523809523809524
102+
PF01782.24 1 0.4523809523809524 0.4523809523809524
103+
PF00009.33 2 0.4148936170212766 0.32978723404255317
104+
PF00005.33 2 0.40145985401459855 0.40145985401459855
105+
PF08428.16 1 0.39473684210526316 0.39473684210526316
106+
PF00679.30 1 0.3707865168539326 0.3707865168539326
107+
PF00448.28 2 0.3673469387755102 0.3673469387755102
108+
PF06755.17 1 0.35714285714285715 0.35714285714285715
109+
PF10800.13 1 0.34615384615384615 0.34615384615384615
110+
PF02922.24 1 0.3373493975903614 0.3373493975903614
111+
PF00472.26 1 0.33620689655172414 0.33620689655172414
112+
PF23139.1 1 0.32894736842105265 0.32894736842105265
113+
PF18818.7 1 0.30952380952380953 0.30952380952380953
114+
PF21018.2 1 0.2727272727272727 0.2727272727272727
115+
PF14284.11 1 0.272108843537415 0.272108843537415
116+
PF13288.12 1 0.2608695652173913 0.2608695652173913
117+
PF00308.24 1 0.24691358024691357 0.24691358024691357
118+
PF12978.13 1 0.24528301886792453 0.24528301886792453
119+
PF00724.26 2 0.2309941520467836 0.14912280701754385
120+
PF19306.5 1 0.2261904761904762 0.2261904761904762
121+
PF06924.17 1 0.22598870056497175 0.22598870056497175
122+
PF02397.22 1 0.22346368715083798 0.22346368715083798
123+
PF03816.19 1 0.20666666666666667 0.20666666666666667
124+
PF00849.27 1 0.19736842105263158 0.19736842105263158
125+
PF13614.12 1 0.1864406779661017 0.1864406779661017
126+
PF09985.14 1 0.17105263157894737 0.17105263157894737
127+
PF01435.24 1 0.16666666666666666 0.16666666666666666
128+
PF03796.21 1 0.1568627450980392 0.1568627450980392
129+
PF17657.6 1 0.1566265060240964 0.1566265060240964
130+
PF00814.31 1 0.15151515151515152 0.15151515151515152
131+
PF03613.19 1 0.1509433962264151 0.1509433962264151
132+
PF04898.20 1 0.1444043321299639 0.1444043321299639
133+
PF11997.14 1 0.1417910447761194 0.1417910447761194
134+
PF02601.20 1 0.12698412698412698 0.12698412698412698
135+
PF02896.24 1 0.12627986348122866 0.12627986348122866
136+
PF00393.24 1 0.11724137931034483 0.11724137931034483
137+
PF01702.25 1 0.10644257703081232 0.10644257703081232
138+
PF01041.23 1 0.10277777777777777 0.10277777777777777
139+
PF05649.18 1 0.10236220472440945 0.10236220472440945
140+
PF06965.17 1 0.10160427807486631 0.10160427807486631
141+
PF00478.31 1 0.10144927536231885 0.10144927536231885
142+
PF00860.26 1 0.10025706940874037 0.10025706940874037
143+
PF07971.18 1 0.09051724137931035 0.09051724137931035
144+
PF12979.12 1 0.08882521489971347 0.08882521489971347
145+
PF00330.25 1 0.08855291576673865 0.08855291576673865
146+
PF01425.27 1 0.08764044943820225 0.08764044943820225
147+
PF00171.27 1 0.08676789587852494 0.08676789587852494
148+
PF02652.20 1 0.07279693486590039 0.07279693486590039
149+
PF13597.11 1 0.060498220640569395 0.060498220640569395
150+
PF02901.20 1 0.06027820710973725 0.06027820710973725
151+
PF09586.16 1 0.045508982035928146 0.045508982035928146
152+
"""
153+
).encode()
154+
)
155+
os.makedirs(f"{pfam_dir}", exist_ok=True)
156+
with open(f"{pfam_dir}/{sample_accession}_pfam.stats.json", "wt") as f:
157+
f.write(r'{"reads_mapped": 67, "hmm_count": 63, "read_hit_count": 67}')
154158

155159
# Create taxonomy-summary directory and subdirectories
156160
tax_dir = f"{results_dir}/{sample_accession}/taxonomy-summary"
@@ -1405,3 +1409,181 @@ def suspend_side_effect(wait_for_input=None):
14051409
assert (
14061410
move_to_private_found
14071411
), "No move operation found targeting private results directory"
1412+
1413+
1414+
@pytest.mark.httpx_mock(should_mock=should_not_mock_httpx_requests_to_prefect_server)
1415+
@pytest.mark.django_db(transaction=True)
1416+
@patch(
1417+
"workflows.flows.analyse_study_tasks.raw_reads.run_rawreads_pipeline_via_samplesheet.queryset_hash"
1418+
)
1419+
@patch(
1420+
"workflows.data_io_utils.mgnify_v6_utils.rawreads.FileIsNotEmptyRule",
1421+
MockFileIsNotEmptyRule,
1422+
)
1423+
@patch(
1424+
"workflows.flows.analyse_study_tasks.shared.copy_v6_pipeline_results.run_deployment"
1425+
)
1426+
@pytest.mark.parametrize(
1427+
"mock_suspend_flow_run", ["workflows.flows.analysis_rawreads_study"], indirect=True
1428+
)
1429+
def test_prefect_analyse_rawreads_flow_no_functional(
1430+
mock_run_deployment,
1431+
mock_queryset_hash_for_rawreads,
1432+
prefect_harness,
1433+
httpx_mock,
1434+
ena_any_sample_metadata,
1435+
mock_cluster_can_accept_jobs_yes,
1436+
mock_start_cluster_job,
1437+
mock_check_cluster_job_all_completed,
1438+
raw_read_ena_study,
1439+
mock_suspend_flow_run,
1440+
admin_user,
1441+
top_level_biomes,
1442+
):
1443+
"""Test that the raw-reads flow completes when functional analysis is disabled."""
1444+
1445+
EMG_CONFIG.rawreads_pipeline.keep_study_summary_partials = True
1446+
mock_run_deployment.return_value = Mock(id="mock-flow-run-id")
1447+
1448+
samplesheet_hash = "nofunc123"
1449+
mock_queryset_hash_for_rawreads.return_value = samplesheet_hash
1450+
1451+
study_accession = "ERP136385"
1452+
all_results = ["ERR10889230", "ERR10889231"]
1453+
1454+
# mock ENA responses
1455+
httpx_mock.add_response(
1456+
url=f"{EMG_CONFIG.ena.portal_search_api}?"
1457+
f"result=study"
1458+
f"&query=%22%28study_accession%3D{study_accession}+OR+secondary_study_accession%3D{study_accession}%29%22"
1459+
f"&fields=study_title%2Cstudy_description%2Ccenter_name%2Csecondary_study_accession%2Cstudy_name"
1460+
f"&limit=10"
1461+
f"&format=json"
1462+
f"&dataPortal=metagenome",
1463+
json=[
1464+
{
1465+
"study_accession": study_accession,
1466+
"secondary_study_accession": study_accession,
1467+
"study_title": "No-functional test study",
1468+
},
1469+
],
1470+
is_reusable=True,
1471+
is_optional=True,
1472+
)
1473+
httpx_mock.add_response(
1474+
url=f"{EMG_CONFIG.ena.portal_search_api}?"
1475+
f"result=study"
1476+
f"&query=%22%28study_accession%3D{study_accession}+OR+secondary_study_accession%3D{study_accession}%29%22"
1477+
f"&fields=study_accession"
1478+
f"&limit="
1479+
f"&format=json"
1480+
f"&dataPortal=metagenome",
1481+
json=[{"study_accession": study_accession}],
1482+
is_reusable=True,
1483+
is_optional=True,
1484+
)
1485+
httpx_mock.add_response(
1486+
url=f"{EMG_CONFIG.ena.portal_search_api}?"
1487+
f"result=read_run"
1488+
f"&query=%22%28%28study_accession={study_accession}+OR+secondary_study_accession={study_accession}%29%20AND%20library_strategy=WGS%29%22"
1489+
f"&limit=10000"
1490+
f"&format=json"
1491+
f"&fields=run_accession%2Csample_accession%2Csample_title%2Csecondary_sample_accession%2Cfastq_md5%2Cfastq_ftp%2Clibrary_layout%2Clibrary_strategy%2Clibrary_source%2Cscientific_name%2Chost_tax_id%2Chost_scientific_name%2Cinstrument_platform%2Cinstrument_model%2Clocation%2Clat%2Clon"
1492+
f"&dataPortal=metagenome",
1493+
json=[
1494+
{
1495+
"run_accession": run_acc,
1496+
"sample_accession": f"SAMEA11243{i}",
1497+
"sample_title": "stool",
1498+
"secondary_sample_accession": f"ERS1454{i}",
1499+
"fastq_md5": "aaa;bbb;ccc",
1500+
"fastq_ftp": f"ftp.sra.ebi.ac.uk/vol1/fastq/{run_acc}/{run_acc}.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/{run_acc}/{run_acc}_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/{run_acc}/{run_acc}_2.fastq.gz",
1501+
"library_layout": "PAIRED",
1502+
"library_strategy": "WGS",
1503+
"library_source": "METAGENOMIC",
1504+
"scientific_name": "human gut metagenome",
1505+
"host_tax_id": "",
1506+
"host_scientific_name": "",
1507+
"instrument_platform": "ILLUMINA",
1508+
"instrument_model": "Illumina HiSeq 2500",
1509+
"location": "",
1510+
"lat": "",
1511+
"lon": "",
1512+
}
1513+
for i, run_acc in enumerate(all_results)
1514+
],
1515+
is_reusable=True,
1516+
is_optional=True,
1517+
)
1518+
1519+
# Create fake results WITHOUT functional analysis
1520+
rawreads_folder = (
1521+
Path(EMG_CONFIG.slurm.default_workdir)
1522+
/ Path(study_accession)
1523+
/ Path(
1524+
f"{EMG_CONFIG.rawreads_pipeline.pipeline_name}_{EMG_CONFIG.rawreads_pipeline.pipeline_version}"
1525+
)
1526+
/ Path(samplesheet_hash)
1527+
)
1528+
rawreads_folder.mkdir(exist_ok=True, parents=True)
1529+
1530+
with open(
1531+
f"{rawreads_folder}/{EMG_CONFIG.rawreads_pipeline.completed_runs_csv}",
1532+
"w",
1533+
) as file:
1534+
for r in all_results:
1535+
file.write(f"{r},all_results\n")
1536+
1537+
for r in all_results:
1538+
generate_fake_rawreads_pipeline_results(
1539+
rawreads_folder, r, make_functional=False
1540+
)
1541+
1542+
# Pretend that a human resumed the flow with functional_analysis=False
1543+
BiomeChoices = Enum("BiomeChoices", {"root.engineered": "Root:Engineered"})
1544+
UserChoices = get_users_as_choices()
1545+
1546+
class AnalyseStudyInput(BaseModel):
1547+
biome: BiomeChoices
1548+
watchers: List[UserChoices]
1549+
library_strategy_policy: Optional[ENALibraryStrategyPolicy]
1550+
functional_analysis: bool
1551+
webin_owner: Optional[str]
1552+
1553+
def suspend_side_effect(wait_for_input=None):
1554+
if wait_for_input.__name__ == "AnalyseStudyInput":
1555+
return AnalyseStudyInput(
1556+
biome=BiomeChoices["root.engineered"],
1557+
watchers=[UserChoices[admin_user.username]],
1558+
library_strategy_policy=ENALibraryStrategyPolicy.ONLY_IF_CORRECT_IN_ENA,
1559+
functional_analysis=False,
1560+
webin_owner=None,
1561+
)
1562+
1563+
mock_suspend_flow_run.side_effect = suspend_side_effect
1564+
1565+
# RUN MAIN FLOW
1566+
analysis_rawreads_study(study_accession=study_accession)
1567+
1568+
mock_start_cluster_job.assert_called()
1569+
mock_check_cluster_job_all_completed.assert_called()
1570+
1571+
study = analyses.models.Study.objects.get_or_create_for_ena_study(study_accession)
1572+
study.refresh_from_db()
1573+
assert study.features.has_v6_analyses
1574+
1575+
# All analyses should complete and be imported
1576+
assert study.analyses.filter(status__analysis_completed=True).count() == 2
1577+
1578+
# Taxonomic annotations should still be present
1579+
analysis_obj: analyses.models.Analysis = (
1580+
analyses.models.Analysis.objects_and_annotations.get(
1581+
run__ena_accessions__contains=[all_results[0]]
1582+
)
1583+
)
1584+
assert analyses.models.Analysis.TAXONOMIES in analysis_obj.annotations
1585+
1586+
# Functional annotations should NOT be present
1587+
assert (
1588+
analyses.models.Analysis.FUNCTIONAL_ANNOTATION not in analysis_obj.annotations
1589+
)

0 commit comments

Comments
 (0)