|
7 | 7 | from malariagen_data.anoph.dipclust import AnophelesDipClustAnalysis
|
8 | 8 |
|
9 | 9 |
|
| 10 | +def random_transcripts_contig(*, api, contig, n): |
| 11 | + df_gff = api.genome_features(attributes=["ID", "Parent"]) |
| 12 | + df_transcripts = df_gff.query(f"type == 'mRNA' and contig == '{contig}'") |
| 13 | + transcript_ids = df_transcripts["ID"].dropna().to_list() |
| 14 | + transcripts = random.sample(transcript_ids, n) |
| 15 | + return transcripts |
| 16 | + |
| 17 | + |
10 | 18 | @pytest.fixture
|
11 | 19 | def ag3_sim_api(ag3_sim_fixture):
|
12 | 20 | return AnophelesDipClustAnalysis(
|
@@ -98,3 +106,97 @@ def test_plot_diplotype_clustering(
|
98 | 106 |
|
99 | 107 | # Run checks.
|
100 | 108 | api.plot_diplotype_clustering(**dipclust_params)
|
| 109 | + |
| 110 | + |
| 111 | +@pytest.mark.parametrize("distance_metric", ["cityblock", "euclidean"]) |
| 112 | +@parametrize_with_cases("fixture,api", cases=".") |
| 113 | +def test_plot_diplotype_clustering_advanced( |
| 114 | + fixture, api: AnophelesDipClustAnalysis, distance_metric |
| 115 | +): |
| 116 | + # Set up test parameters. |
| 117 | + all_sample_sets = api.sample_sets()["sample_set"].to_list() |
| 118 | + linkage_methods = ( |
| 119 | + "single", |
| 120 | + "complete", |
| 121 | + "average", |
| 122 | + "weighted", |
| 123 | + "centroid", |
| 124 | + "median", |
| 125 | + "ward", |
| 126 | + ) |
| 127 | + sample_queries = (None, "sex_call == 'F'") |
| 128 | + dipclust_params = dict( |
| 129 | + region=fixture.random_region_str(region_size=5000), |
| 130 | + sample_sets=[random.choice(all_sample_sets)], |
| 131 | + linkage_method=random.choice(linkage_methods), |
| 132 | + distance_metric=distance_metric, |
| 133 | + sample_query=random.choice(sample_queries), |
| 134 | + show=False, |
| 135 | + ) |
| 136 | + |
| 137 | + # Run checks. |
| 138 | + api.plot_diplotype_clustering_advanced(**dipclust_params) |
| 139 | + |
| 140 | + |
| 141 | +@pytest.mark.parametrize("n", [1, 2]) |
| 142 | +@parametrize_with_cases("fixture,api", cases=".") |
| 143 | +def test_plot_diplotype_clustering_advanced_with_transcript( |
| 144 | + fixture, api: AnophelesDipClustAnalysis, n |
| 145 | +): |
| 146 | + # Set up test parameters. |
| 147 | + contig = fixture.random_contig() |
| 148 | + transcripts = random_transcripts_contig(api=api, contig=contig, n=n) |
| 149 | + all_sample_sets = api.sample_sets()["sample_set"].to_list() |
| 150 | + linkage_methods = ( |
| 151 | + "single", |
| 152 | + "complete", |
| 153 | + "average", |
| 154 | + "weighted", |
| 155 | + "centroid", |
| 156 | + "median", |
| 157 | + "ward", |
| 158 | + ) |
| 159 | + sample_queries = (None, "sex_call == 'F'") |
| 160 | + dipclust_params = dict( |
| 161 | + region=contig, |
| 162 | + snp_transcripts=transcripts, |
| 163 | + sample_sets=[random.choice(all_sample_sets)], |
| 164 | + linkage_method=random.choice(linkage_methods), |
| 165 | + distance_metric="cityblock", |
| 166 | + sample_query=random.choice(sample_queries), |
| 167 | + show=False, |
| 168 | + ) |
| 169 | + |
| 170 | + # Run checks. |
| 171 | + api.plot_diplotype_clustering_advanced(**dipclust_params) |
| 172 | + |
| 173 | + |
| 174 | +@parametrize_with_cases("fixture,api", cases=".") |
| 175 | +def test_plot_diplotype_clustering_advanced_with_cnv_region( |
| 176 | + fixture, api: AnophelesDipClustAnalysis |
| 177 | +): |
| 178 | + # Set up test parameters. |
| 179 | + region = fixture.random_region_str(region_size=5000) |
| 180 | + all_sample_sets = api.sample_sets()["sample_set"].to_list() |
| 181 | + linkage_methods = ( |
| 182 | + "single", |
| 183 | + "complete", |
| 184 | + "average", |
| 185 | + "weighted", |
| 186 | + "centroid", |
| 187 | + "median", |
| 188 | + "ward", |
| 189 | + ) |
| 190 | + sample_queries = (None, "sex_call == 'F'") |
| 191 | + dipclust_params = dict( |
| 192 | + region=region, |
| 193 | + cnv_region=region, |
| 194 | + sample_sets=[random.choice(all_sample_sets)], |
| 195 | + linkage_method=random.choice(linkage_methods), |
| 196 | + distance_metric="cityblock", |
| 197 | + sample_query=random.choice(sample_queries), |
| 198 | + show=False, |
| 199 | + ) |
| 200 | + |
| 201 | + # Run checks. |
| 202 | + api.plot_diplotype_clustering_advanced(**dipclust_params) |
0 commit comments