4242EMG_CONFIG = settings .EMG_CONFIG
4343
4444
45- def generate_fake_rawreads_pipeline_results (results_dir , sample_accession ):
45+ def generate_fake_rawreads_pipeline_results (
46+ results_dir , sample_accession , make_functional = True
47+ ):
4648 """
4749 Generate fake raw-reads pipeline results for testing.
4850
4951 Based on the directory structure provided in the issue description.
5052
5153 :param results_dir: Directory to create the fake results in
5254 :param sample_accession: Sample accession to use in file names
55+ :param make_functional: Whether to generate functional analysis (pfam) results
5356 """
5457
5558 logger = logging .getLogger ("generate_dummy_data_debug" )
@@ -73,84 +76,85 @@ def generate_fake_rawreads_pipeline_results(results_dir, sample_accession):
7376 )
7477
7578 # Create function-summary directory and subdirectories
76- func_dir = f"{ results_dir } /{ sample_accession } /function-summary"
77- logger .info (f"Creating dummy functional results at { func_dir } " )
78- pfam_dir = f"{ func_dir } /pfam"
79- os .makedirs (pfam_dir , exist_ok = True )
80- with gzip .open (f"{ pfam_dir } /{ sample_accession } _pfam.txt.gz" , "wb" ) as f :
81- f .write (
82- dedent (
83- """\
84- # function read_count coverage_depth coverage_breadth
85- PF21175.2 1 0.9583333333333334 0.9583333333333334
86- PF10418.14 1 0.926829268292683 0.926829268292683
87- PF17802.7 1 0.7692307692307693 0.7692307692307693
88- PF17769.7 1 0.7142857142857143 0.7142857142857143
89- PF10531.15 1 0.6909090909090909 0.6909090909090909
90- PF22269.2 1 0.6612903225806451 0.6612903225806451
91- PF13411.12 1 0.6376811594202898 0.6376811594202898
92- PF00515.34 1 0.5882352941176471 0.5882352941176471
93- PF16320.10 1 0.5625 0.5625
94- PF13186.11 1 0.5522388059701493 0.5522388059701493
95- PF16124.10 1 0.5303030303030303 0.5303030303030303
96- PF13807.11 1 0.47560975609756095 0.47560975609756095
97- PF22811.2 1 0.4523809523809524 0.4523809523809524
98- PF01782.24 1 0.4523809523809524 0.4523809523809524
99- PF00009.33 2 0.4148936170212766 0.32978723404255317
100- PF00005.33 2 0.40145985401459855 0.40145985401459855
101- PF08428.16 1 0.39473684210526316 0.39473684210526316
102- PF00679.30 1 0.3707865168539326 0.3707865168539326
103- PF00448.28 2 0.3673469387755102 0.3673469387755102
104- PF06755.17 1 0.35714285714285715 0.35714285714285715
105- PF10800.13 1 0.34615384615384615 0.34615384615384615
106- PF02922.24 1 0.3373493975903614 0.3373493975903614
107- PF00472.26 1 0.33620689655172414 0.33620689655172414
108- PF23139.1 1 0.32894736842105265 0.32894736842105265
109- PF18818.7 1 0.30952380952380953 0.30952380952380953
110- PF21018.2 1 0.2727272727272727 0.2727272727272727
111- PF14284.11 1 0.272108843537415 0.272108843537415
112- PF13288.12 1 0.2608695652173913 0.2608695652173913
113- PF00308.24 1 0.24691358024691357 0.24691358024691357
114- PF12978.13 1 0.24528301886792453 0.24528301886792453
115- PF00724.26 2 0.2309941520467836 0.14912280701754385
116- PF19306.5 1 0.2261904761904762 0.2261904761904762
117- PF06924.17 1 0.22598870056497175 0.22598870056497175
118- PF02397.22 1 0.22346368715083798 0.22346368715083798
119- PF03816.19 1 0.20666666666666667 0.20666666666666667
120- PF00849.27 1 0.19736842105263158 0.19736842105263158
121- PF13614.12 1 0.1864406779661017 0.1864406779661017
122- PF09985.14 1 0.17105263157894737 0.17105263157894737
123- PF01435.24 1 0.16666666666666666 0.16666666666666666
124- PF03796.21 1 0.1568627450980392 0.1568627450980392
125- PF17657.6 1 0.1566265060240964 0.1566265060240964
126- PF00814.31 1 0.15151515151515152 0.15151515151515152
127- PF03613.19 1 0.1509433962264151 0.1509433962264151
128- PF04898.20 1 0.1444043321299639 0.1444043321299639
129- PF11997.14 1 0.1417910447761194 0.1417910447761194
130- PF02601.20 1 0.12698412698412698 0.12698412698412698
131- PF02896.24 1 0.12627986348122866 0.12627986348122866
132- PF00393.24 1 0.11724137931034483 0.11724137931034483
133- PF01702.25 1 0.10644257703081232 0.10644257703081232
134- PF01041.23 1 0.10277777777777777 0.10277777777777777
135- PF05649.18 1 0.10236220472440945 0.10236220472440945
136- PF06965.17 1 0.10160427807486631 0.10160427807486631
137- PF00478.31 1 0.10144927536231885 0.10144927536231885
138- PF00860.26 1 0.10025706940874037 0.10025706940874037
139- PF07971.18 1 0.09051724137931035 0.09051724137931035
140- PF12979.12 1 0.08882521489971347 0.08882521489971347
141- PF00330.25 1 0.08855291576673865 0.08855291576673865
142- PF01425.27 1 0.08764044943820225 0.08764044943820225
143- PF00171.27 1 0.08676789587852494 0.08676789587852494
144- PF02652.20 1 0.07279693486590039 0.07279693486590039
145- PF13597.11 1 0.060498220640569395 0.060498220640569395
146- PF02901.20 1 0.06027820710973725 0.06027820710973725
147- PF09586.16 1 0.045508982035928146 0.045508982035928146
148- """
149- ).encode ()
150- )
151- os .makedirs (f"{ pfam_dir } " , exist_ok = True )
152- with open (f"{ pfam_dir } /{ sample_accession } _pfam.stats.json" , "wt" ) as f :
153- f .write (r'{"reads_mapped": 67, "hmm_count": 63, "read_hit_count": 67}' )
79+ if make_functional :
80+ func_dir = f"{ results_dir } /{ sample_accession } /function-summary"
81+ logger .info (f"Creating dummy functional results at { func_dir } " )
82+ pfam_dir = f"{ func_dir } /pfam"
83+ os .makedirs (pfam_dir , exist_ok = True )
84+ with gzip .open (f"{ pfam_dir } /{ sample_accession } _pfam.txt.gz" , "wb" ) as f :
85+ f .write (
86+ dedent (
87+ """\
88+ # function read_count coverage_depth coverage_breadth
89+ PF21175.2 1 0.9583333333333334 0.9583333333333334
90+ PF10418.14 1 0.926829268292683 0.926829268292683
91+ PF17802.7 1 0.7692307692307693 0.7692307692307693
92+ PF17769.7 1 0.7142857142857143 0.7142857142857143
93+ PF10531.15 1 0.6909090909090909 0.6909090909090909
94+ PF22269.2 1 0.6612903225806451 0.6612903225806451
95+ PF13411.12 1 0.6376811594202898 0.6376811594202898
96+ PF00515.34 1 0.5882352941176471 0.5882352941176471
97+ PF16320.10 1 0.5625 0.5625
98+ PF13186.11 1 0.5522388059701493 0.5522388059701493
99+ PF16124.10 1 0.5303030303030303 0.5303030303030303
100+ PF13807.11 1 0.47560975609756095 0.47560975609756095
101+ PF22811.2 1 0.4523809523809524 0.4523809523809524
102+ PF01782.24 1 0.4523809523809524 0.4523809523809524
103+ PF00009.33 2 0.4148936170212766 0.32978723404255317
104+ PF00005.33 2 0.40145985401459855 0.40145985401459855
105+ PF08428.16 1 0.39473684210526316 0.39473684210526316
106+ PF00679.30 1 0.3707865168539326 0.3707865168539326
107+ PF00448.28 2 0.3673469387755102 0.3673469387755102
108+ PF06755.17 1 0.35714285714285715 0.35714285714285715
109+ PF10800.13 1 0.34615384615384615 0.34615384615384615
110+ PF02922.24 1 0.3373493975903614 0.3373493975903614
111+ PF00472.26 1 0.33620689655172414 0.33620689655172414
112+ PF23139.1 1 0.32894736842105265 0.32894736842105265
113+ PF18818.7 1 0.30952380952380953 0.30952380952380953
114+ PF21018.2 1 0.2727272727272727 0.2727272727272727
115+ PF14284.11 1 0.272108843537415 0.272108843537415
116+ PF13288.12 1 0.2608695652173913 0.2608695652173913
117+ PF00308.24 1 0.24691358024691357 0.24691358024691357
118+ PF12978.13 1 0.24528301886792453 0.24528301886792453
119+ PF00724.26 2 0.2309941520467836 0.14912280701754385
120+ PF19306.5 1 0.2261904761904762 0.2261904761904762
121+ PF06924.17 1 0.22598870056497175 0.22598870056497175
122+ PF02397.22 1 0.22346368715083798 0.22346368715083798
123+ PF03816.19 1 0.20666666666666667 0.20666666666666667
124+ PF00849.27 1 0.19736842105263158 0.19736842105263158
125+ PF13614.12 1 0.1864406779661017 0.1864406779661017
126+ PF09985.14 1 0.17105263157894737 0.17105263157894737
127+ PF01435.24 1 0.16666666666666666 0.16666666666666666
128+ PF03796.21 1 0.1568627450980392 0.1568627450980392
129+ PF17657.6 1 0.1566265060240964 0.1566265060240964
130+ PF00814.31 1 0.15151515151515152 0.15151515151515152
131+ PF03613.19 1 0.1509433962264151 0.1509433962264151
132+ PF04898.20 1 0.1444043321299639 0.1444043321299639
133+ PF11997.14 1 0.1417910447761194 0.1417910447761194
134+ PF02601.20 1 0.12698412698412698 0.12698412698412698
135+ PF02896.24 1 0.12627986348122866 0.12627986348122866
136+ PF00393.24 1 0.11724137931034483 0.11724137931034483
137+ PF01702.25 1 0.10644257703081232 0.10644257703081232
138+ PF01041.23 1 0.10277777777777777 0.10277777777777777
139+ PF05649.18 1 0.10236220472440945 0.10236220472440945
140+ PF06965.17 1 0.10160427807486631 0.10160427807486631
141+ PF00478.31 1 0.10144927536231885 0.10144927536231885
142+ PF00860.26 1 0.10025706940874037 0.10025706940874037
143+ PF07971.18 1 0.09051724137931035 0.09051724137931035
144+ PF12979.12 1 0.08882521489971347 0.08882521489971347
145+ PF00330.25 1 0.08855291576673865 0.08855291576673865
146+ PF01425.27 1 0.08764044943820225 0.08764044943820225
147+ PF00171.27 1 0.08676789587852494 0.08676789587852494
148+ PF02652.20 1 0.07279693486590039 0.07279693486590039
149+ PF13597.11 1 0.060498220640569395 0.060498220640569395
150+ PF02901.20 1 0.06027820710973725 0.06027820710973725
151+ PF09586.16 1 0.045508982035928146 0.045508982035928146
152+ """
153+ ).encode ()
154+ )
155+ os .makedirs (f"{ pfam_dir } " , exist_ok = True )
156+ with open (f"{ pfam_dir } /{ sample_accession } _pfam.stats.json" , "wt" ) as f :
157+ f .write (r'{"reads_mapped": 67, "hmm_count": 63, "read_hit_count": 67}' )
154158
155159 # Create taxonomy-summary directory and subdirectories
156160 tax_dir = f"{ results_dir } /{ sample_accession } /taxonomy-summary"
@@ -1405,3 +1409,181 @@ def suspend_side_effect(wait_for_input=None):
14051409 assert (
14061410 move_to_private_found
14071411 ), "No move operation found targeting private results directory"
1412+
1413+
1414+ @pytest .mark .httpx_mock (should_mock = should_not_mock_httpx_requests_to_prefect_server )
1415+ @pytest .mark .django_db (transaction = True )
1416+ @patch (
1417+ "workflows.flows.analyse_study_tasks.raw_reads.run_rawreads_pipeline_via_samplesheet.queryset_hash"
1418+ )
1419+ @patch (
1420+ "workflows.data_io_utils.mgnify_v6_utils.rawreads.FileIsNotEmptyRule" ,
1421+ MockFileIsNotEmptyRule ,
1422+ )
1423+ @patch (
1424+ "workflows.flows.analyse_study_tasks.shared.copy_v6_pipeline_results.run_deployment"
1425+ )
1426+ @pytest .mark .parametrize (
1427+ "mock_suspend_flow_run" , ["workflows.flows.analysis_rawreads_study" ], indirect = True
1428+ )
1429+ def test_prefect_analyse_rawreads_flow_no_functional (
1430+ mock_run_deployment ,
1431+ mock_queryset_hash_for_rawreads ,
1432+ prefect_harness ,
1433+ httpx_mock ,
1434+ ena_any_sample_metadata ,
1435+ mock_cluster_can_accept_jobs_yes ,
1436+ mock_start_cluster_job ,
1437+ mock_check_cluster_job_all_completed ,
1438+ raw_read_ena_study ,
1439+ mock_suspend_flow_run ,
1440+ admin_user ,
1441+ top_level_biomes ,
1442+ ):
1443+ """Test that the raw-reads flow completes when functional analysis is disabled."""
1444+
1445+ EMG_CONFIG .rawreads_pipeline .keep_study_summary_partials = True
1446+ mock_run_deployment .return_value = Mock (id = "mock-flow-run-id" )
1447+
1448+ samplesheet_hash = "nofunc123"
1449+ mock_queryset_hash_for_rawreads .return_value = samplesheet_hash
1450+
1451+ study_accession = "ERP136385"
1452+ all_results = ["ERR10889230" , "ERR10889231" ]
1453+
1454+ # mock ENA responses
1455+ httpx_mock .add_response (
1456+ url = f"{ EMG_CONFIG .ena .portal_search_api } ?"
1457+ f"result=study"
1458+ f"&query=%22%28study_accession%3D{ study_accession } +OR+secondary_study_accession%3D{ study_accession } %29%22"
1459+ f"&fields=study_title%2Cstudy_description%2Ccenter_name%2Csecondary_study_accession%2Cstudy_name"
1460+ f"&limit=10"
1461+ f"&format=json"
1462+ f"&dataPortal=metagenome" ,
1463+ json = [
1464+ {
1465+ "study_accession" : study_accession ,
1466+ "secondary_study_accession" : study_accession ,
1467+ "study_title" : "No-functional test study" ,
1468+ },
1469+ ],
1470+ is_reusable = True ,
1471+ is_optional = True ,
1472+ )
1473+ httpx_mock .add_response (
1474+ url = f"{ EMG_CONFIG .ena .portal_search_api } ?"
1475+ f"result=study"
1476+ f"&query=%22%28study_accession%3D{ study_accession } +OR+secondary_study_accession%3D{ study_accession } %29%22"
1477+ f"&fields=study_accession"
1478+ f"&limit="
1479+ f"&format=json"
1480+ f"&dataPortal=metagenome" ,
1481+ json = [{"study_accession" : study_accession }],
1482+ is_reusable = True ,
1483+ is_optional = True ,
1484+ )
1485+ httpx_mock .add_response (
1486+ url = f"{ EMG_CONFIG .ena .portal_search_api } ?"
1487+ f"result=read_run"
1488+ f"&query=%22%28%28study_accession={ study_accession } +OR+secondary_study_accession={ study_accession } %29%20AND%20library_strategy=WGS%29%22"
1489+ f"&limit=10000"
1490+ f"&format=json"
1491+ f"&fields=run_accession%2Csample_accession%2Csample_title%2Csecondary_sample_accession%2Cfastq_md5%2Cfastq_ftp%2Clibrary_layout%2Clibrary_strategy%2Clibrary_source%2Cscientific_name%2Chost_tax_id%2Chost_scientific_name%2Cinstrument_platform%2Cinstrument_model%2Clocation%2Clat%2Clon"
1492+ f"&dataPortal=metagenome" ,
1493+ json = [
1494+ {
1495+ "run_accession" : run_acc ,
1496+ "sample_accession" : f"SAMEA11243{ i } " ,
1497+ "sample_title" : "stool" ,
1498+ "secondary_sample_accession" : f"ERS1454{ i } " ,
1499+ "fastq_md5" : "aaa;bbb;ccc" ,
1500+ "fastq_ftp" : f"ftp.sra.ebi.ac.uk/vol1/fastq/{ run_acc } /{ run_acc } .fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/{ run_acc } /{ run_acc } _1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/{ run_acc } /{ run_acc } _2.fastq.gz" ,
1501+ "library_layout" : "PAIRED" ,
1502+ "library_strategy" : "WGS" ,
1503+ "library_source" : "METAGENOMIC" ,
1504+ "scientific_name" : "human gut metagenome" ,
1505+ "host_tax_id" : "" ,
1506+ "host_scientific_name" : "" ,
1507+ "instrument_platform" : "ILLUMINA" ,
1508+ "instrument_model" : "Illumina HiSeq 2500" ,
1509+ "location" : "" ,
1510+ "lat" : "" ,
1511+ "lon" : "" ,
1512+ }
1513+ for i , run_acc in enumerate (all_results )
1514+ ],
1515+ is_reusable = True ,
1516+ is_optional = True ,
1517+ )
1518+
1519+ # Create fake results WITHOUT functional analysis
1520+ rawreads_folder = (
1521+ Path (EMG_CONFIG .slurm .default_workdir )
1522+ / Path (study_accession )
1523+ / Path (
1524+ f"{ EMG_CONFIG .rawreads_pipeline .pipeline_name } _{ EMG_CONFIG .rawreads_pipeline .pipeline_version } "
1525+ )
1526+ / Path (samplesheet_hash )
1527+ )
1528+ rawreads_folder .mkdir (exist_ok = True , parents = True )
1529+
1530+ with open (
1531+ f"{ rawreads_folder } /{ EMG_CONFIG .rawreads_pipeline .completed_runs_csv } " ,
1532+ "w" ,
1533+ ) as file :
1534+ for r in all_results :
1535+ file .write (f"{ r } ,all_results\n " )
1536+
1537+ for r in all_results :
1538+ generate_fake_rawreads_pipeline_results (
1539+ rawreads_folder , r , make_functional = False
1540+ )
1541+
1542+ # Pretend that a human resumed the flow with functional_analysis=False
1543+ BiomeChoices = Enum ("BiomeChoices" , {"root.engineered" : "Root:Engineered" })
1544+ UserChoices = get_users_as_choices ()
1545+
1546+ class AnalyseStudyInput (BaseModel ):
1547+ biome : BiomeChoices
1548+ watchers : List [UserChoices ]
1549+ library_strategy_policy : Optional [ENALibraryStrategyPolicy ]
1550+ functional_analysis : bool
1551+ webin_owner : Optional [str ]
1552+
1553+ def suspend_side_effect (wait_for_input = None ):
1554+ if wait_for_input .__name__ == "AnalyseStudyInput" :
1555+ return AnalyseStudyInput (
1556+ biome = BiomeChoices ["root.engineered" ],
1557+ watchers = [UserChoices [admin_user .username ]],
1558+ library_strategy_policy = ENALibraryStrategyPolicy .ONLY_IF_CORRECT_IN_ENA ,
1559+ functional_analysis = False ,
1560+ webin_owner = None ,
1561+ )
1562+
1563+ mock_suspend_flow_run .side_effect = suspend_side_effect
1564+
1565+ # RUN MAIN FLOW
1566+ analysis_rawreads_study (study_accession = study_accession )
1567+
1568+ mock_start_cluster_job .assert_called ()
1569+ mock_check_cluster_job_all_completed .assert_called ()
1570+
1571+ study = analyses .models .Study .objects .get_or_create_for_ena_study (study_accession )
1572+ study .refresh_from_db ()
1573+ assert study .features .has_v6_analyses
1574+
1575+ # All analyses should complete and be imported
1576+ assert study .analyses .filter (status__analysis_completed = True ).count () == 2
1577+
1578+ # Taxonomic annotations should still be present
1579+ analysis_obj : analyses .models .Analysis = (
1580+ analyses .models .Analysis .objects_and_annotations .get (
1581+ run__ena_accessions__contains = [all_results [0 ]]
1582+ )
1583+ )
1584+ assert analyses .models .Analysis .TAXONOMIES in analysis_obj .annotations
1585+
1586+ # Functional annotations should NOT be present
1587+ assert (
1588+ analyses .models .Analysis .FUNCTIONAL_ANNOTATION not in analysis_obj .annotations
1589+ )
0 commit comments