Skip to content

Commit 6b9767d

Browse files
committed
More tests and some trimming of the hierarchy in anoph
1 parent 75b4cc3 commit 6b9767d

File tree

3 files changed

+106
-11
lines changed

3 files changed

+106
-11
lines changed

malariagen_data/anoph/dipclust.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,16 @@
2323
cnv_params,
2424
)
2525
from .snp_frq import AnophelesSnpFrequencyAnalysis
26-
from .cnv_data import AnophelesCnvData
26+
from .cnv_frq import AnophelesCnvFrequencyAnalysis
2727

2828
AA_CHANGE_QUERY = (
2929
"effect in ['NON_SYNONYMOUS_CODING', 'START_LOST', 'STOP_LOST', 'STOP_GAINED']"
3030
)
3131

3232

33-
class AnophelesDipClustAnalysis(AnophelesSnpFrequencyAnalysis, AnophelesCnvData):
33+
class AnophelesDipClustAnalysis(
34+
AnophelesCnvFrequencyAnalysis, AnophelesSnpFrequencyAnalysis
35+
):
3436
def __init__(
3537
self,
3638
**kwargs,

malariagen_data/anopheles.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,6 @@
1212
import plotly.graph_objects as go # type: ignore
1313
from numpydoc_decorator import doc # type: ignore
1414

15-
from malariagen_data.anoph.snp_frq import (
16-
AnophelesSnpFrequencyAnalysis,
17-
)
18-
19-
from .anoph.cnv_frq import AnophelesCnvFrequencyAnalysis
2015

2116
from .anoph import (
2217
aim_params,
@@ -31,7 +26,6 @@
3126
)
3227
from .anoph.aim_data import AnophelesAimData
3328
from .anoph.base import AnophelesBase
34-
from .anoph.cnv_data import AnophelesCnvData
3529
from .anoph.genome_features import AnophelesGenomeFeaturesData
3630
from .anoph.genome_sequence import AnophelesGenomeSequenceData
3731
from .anoph.hap_data import AnophelesHapData, hap_params
@@ -87,8 +81,6 @@ class AnophelesDataResource(
8781
AnophelesH12Analysis,
8882
AnophelesG123Analysis,
8983
AnophelesFstAnalysis,
90-
AnophelesCnvFrequencyAnalysis,
91-
AnophelesSnpFrequencyAnalysis,
9284
AnophelesHapFrequencyAnalysis,
9385
AnophelesDistanceAnalysis,
9486
AnophelesPca,
@@ -97,7 +89,6 @@ class AnophelesDataResource(
9789
AnophelesAimData,
9890
AnophelesHapData,
9991
AnophelesSnpData,
100-
AnophelesCnvData,
10192
AnophelesSampleMetadata,
10293
AnophelesGenomeFeaturesData,
10394
AnophelesGenomeSequenceData,

tests/anoph/test_dipclust.py

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,14 @@
77
from malariagen_data.anoph.dipclust import AnophelesDipClustAnalysis
88

99

10+
def random_transcripts_contig(*, api, contig, n):
11+
df_gff = api.genome_features(attributes=["ID", "Parent"])
12+
df_transcripts = df_gff.query(f"type == 'mRNA' and contig == '{contig}'")
13+
transcript_ids = df_transcripts["ID"].dropna().to_list()
14+
transcripts = random.sample(transcript_ids, n)
15+
return transcripts
16+
17+
1018
@pytest.fixture
1119
def ag3_sim_api(ag3_sim_fixture):
1220
return AnophelesDipClustAnalysis(
@@ -98,3 +106,97 @@ def test_plot_diplotype_clustering(
98106

99107
# Run checks.
100108
api.plot_diplotype_clustering(**dipclust_params)
109+
110+
111+
@pytest.mark.parametrize("distance_metric", ["cityblock", "euclidean"])
112+
@parametrize_with_cases("fixture,api", cases=".")
113+
def test_plot_diplotype_clustering_advanced(
114+
fixture, api: AnophelesDipClustAnalysis, distance_metric
115+
):
116+
# Set up test parameters.
117+
all_sample_sets = api.sample_sets()["sample_set"].to_list()
118+
linkage_methods = (
119+
"single",
120+
"complete",
121+
"average",
122+
"weighted",
123+
"centroid",
124+
"median",
125+
"ward",
126+
)
127+
sample_queries = (None, "sex_call == 'F'")
128+
dipclust_params = dict(
129+
region=fixture.random_region_str(region_size=5000),
130+
sample_sets=[random.choice(all_sample_sets)],
131+
linkage_method=random.choice(linkage_methods),
132+
distance_metric=distance_metric,
133+
sample_query=random.choice(sample_queries),
134+
show=False,
135+
)
136+
137+
# Run checks.
138+
api.plot_diplotype_clustering_advanced(**dipclust_params)
139+
140+
141+
@pytest.mark.parametrize("n", [1, 2])
142+
@parametrize_with_cases("fixture,api", cases=".")
143+
def test_plot_diplotype_clustering_advanced_with_transcript(
144+
fixture, api: AnophelesDipClustAnalysis, n
145+
):
146+
# Set up test parameters.
147+
contig = fixture.random_contig()
148+
transcripts = random_transcripts_contig(api=api, contig=contig, n=n)
149+
all_sample_sets = api.sample_sets()["sample_set"].to_list()
150+
linkage_methods = (
151+
"single",
152+
"complete",
153+
"average",
154+
"weighted",
155+
"centroid",
156+
"median",
157+
"ward",
158+
)
159+
sample_queries = (None, "sex_call == 'F'")
160+
dipclust_params = dict(
161+
region=contig,
162+
snp_transcripts=transcripts,
163+
sample_sets=[random.choice(all_sample_sets)],
164+
linkage_method=random.choice(linkage_methods),
165+
distance_metric="cityblock",
166+
sample_query=random.choice(sample_queries),
167+
show=False,
168+
)
169+
170+
# Run checks.
171+
api.plot_diplotype_clustering_advanced(**dipclust_params)
172+
173+
174+
@parametrize_with_cases("fixture,api", cases=".")
175+
def test_plot_diplotype_clustering_advanced_with_cnv_region(
176+
fixture, api: AnophelesDipClustAnalysis
177+
):
178+
# Set up test parameters.
179+
region = fixture.random_region_str(region_size=5000)
180+
all_sample_sets = api.sample_sets()["sample_set"].to_list()
181+
linkage_methods = (
182+
"single",
183+
"complete",
184+
"average",
185+
"weighted",
186+
"centroid",
187+
"median",
188+
"ward",
189+
)
190+
sample_queries = (None, "sex_call == 'F'")
191+
dipclust_params = dict(
192+
region=region,
193+
cnv_region=region,
194+
sample_sets=[random.choice(all_sample_sets)],
195+
linkage_method=random.choice(linkage_methods),
196+
distance_metric="cityblock",
197+
sample_query=random.choice(sample_queries),
198+
show=False,
199+
)
200+
201+
# Run checks.
202+
api.plot_diplotype_clustering_advanced(**dipclust_params)

0 commit comments

Comments
 (0)