Skip to content

Commit 0197957

Browse files
authored
Merge pull request #703 from malariagen/600-adding-option-for-multiple-transcripts-to-diplotype_clustering
Allow multiple SNP transcripts via snp_transcript in `plot_diplotype_clustering_advanced()`
2 parents 6ce9f11 + 450235b commit 0197957

File tree

5 files changed

+201
-6074
lines changed

5 files changed

+201
-6074
lines changed

malariagen_data/anoph/dipclust.py

Lines changed: 69 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,16 @@
2323
cnv_params,
2424
)
2525
from .snp_frq import AnophelesSnpFrequencyAnalysis
26-
from .cnv_data import AnophelesCnvData
26+
from .cnv_frq import AnophelesCnvFrequencyAnalysis
2727

2828
AA_CHANGE_QUERY = (
2929
"effect in ['NON_SYNONYMOUS_CODING', 'START_LOST', 'STOP_LOST', 'STOP_GAINED']"
3030
)
3131

3232

33-
class AnophelesDipClustAnalysis(AnophelesSnpFrequencyAnalysis, AnophelesCnvData):
33+
class AnophelesDipClustAnalysis(
34+
AnophelesCnvFrequencyAnalysis, AnophelesSnpFrequencyAnalysis
35+
):
3436
def __init__(
3537
self,
3638
**kwargs,
@@ -190,7 +192,7 @@ def plot_diplotype_clustering(
190192
else:
191193
return {
192194
"figure": fig,
193-
"dendro_sample_id_order": leaf_data["sample_id"].to_list(),
195+
"dendro_sample_id_order": np.asarray(leaf_data["sample_id"].to_list()),
194196
"n_snps": n_snps_used,
195197
}
196198

@@ -319,7 +321,7 @@ def _dipclust_het_bar_trace(
319321
sample_sets: Optional[base_params.sample_sets],
320322
sample_query: Optional[base_params.sample_query],
321323
sample_query_options: Optional[base_params.sample_query_options],
322-
site_mask: base_params.site_mask,
324+
site_mask: Optional[base_params.site_mask],
323325
cohort_size: Optional[base_params.cohort_size],
324326
random_seed: base_params.random_seed,
325327
color_continuous_scale: Optional[plotly_params.color_continuous_scale],
@@ -547,11 +549,52 @@ def _dipclust_concat_subplots(
547549

548550
return fig
549551

552+
def _insert_dipclust_snp_trace(
553+
self,
554+
*,
555+
figures,
556+
subplot_heights,
557+
snp_row_height: plotly_params.height = 25,
558+
transcript: base_params.transcript,
559+
snp_query: Optional[base_params.snp_query] = AA_CHANGE_QUERY,
560+
sample_sets: Optional[base_params.sample_sets],
561+
sample_query: Optional[base_params.sample_query],
562+
sample_query_options: Optional[base_params.sample_query_options],
563+
site_mask: Optional[base_params.site_mask],
564+
dendro_sample_id_order: np.ndarray,
565+
snp_filter_min_maf: float,
566+
snp_colorscale: Optional[plotly_params.color_continuous_scale],
567+
chunks: base_params.chunks = base_params.native_chunks,
568+
inline_array: base_params.inline_array = base_params.inline_array_default,
569+
):
570+
snp_trace, n_snps_transcript = self._dipclust_snp_trace(
571+
transcript=transcript,
572+
sample_sets=sample_sets,
573+
sample_query=sample_query,
574+
sample_query_options=sample_query_options,
575+
snp_query=snp_query,
576+
site_mask=site_mask,
577+
dendro_sample_id_order=dendro_sample_id_order,
578+
snp_filter_min_maf=snp_filter_min_maf,
579+
snp_colorscale=snp_colorscale,
580+
chunks=chunks,
581+
inline_array=inline_array,
582+
)
583+
584+
if snp_trace:
585+
figures.append(snp_trace)
586+
subplot_heights.append(snp_row_height * n_snps_transcript)
587+
else:
588+
print(
589+
f"No SNPs were found below {snp_filter_min_maf} allele frequency. Omitting SNP genotype plot."
590+
)
591+
return figures, subplot_heights
592+
550593
@doc(
551594
summary="Perform diplotype clustering, annotated with heterozygosity, gene copy number and amino acid variants.",
552595
parameters=dict(
553596
heterozygosity="Plot heterozygosity track.",
554-
snp_transcript="Plot amino acid variants for this transcript.",
597+
snp_transcript="Plot amino acid variants for these transcripts.",
555598
cnv_region="Plot gene CNV calls for this region.",
556599
snp_filter_min_maf="Filter amino acid variants with alternate allele frequency below this threshold.",
557600
),
@@ -561,7 +604,7 @@ def plot_diplotype_clustering_advanced(
561604
region: base_params.regions,
562605
heterozygosity: bool = True,
563606
heterozygosity_colorscale: plotly_params.color_continuous_scale = "Greys",
564-
snp_transcript: Optional[base_params.transcript] = None,
607+
snp_transcript: Optional[dipclust_params.snp_transcript] = None,
565608
snp_colorscale: plotly_params.color_continuous_scale = "Greys",
566609
snp_filter_min_maf: float = 0.05,
567610
snp_query: Optional[base_params.snp_query] = AA_CHANGE_QUERY,
@@ -682,9 +725,11 @@ def plot_diplotype_clustering_advanced(
682725
figures.append(cnv_trace)
683726
subplot_heights.append(cnv_row_height * n_cnv_genes)
684727

685-
if snp_transcript:
686-
snp_trace, n_snps_transcript = self._dipclust_snp_trace(
728+
if isinstance(snp_transcript, str):
729+
figures, subplot_heights = self._insert_dipclust_snp_trace(
687730
transcript=snp_transcript,
731+
figures=figures,
732+
subplot_heights=subplot_heights,
688733
sample_sets=sample_sets,
689734
sample_query=sample_query,
690735
sample_query_options=sample_query_options,
@@ -696,13 +741,22 @@ def plot_diplotype_clustering_advanced(
696741
chunks=chunks,
697742
inline_array=inline_array,
698743
)
699-
700-
if snp_trace:
701-
figures.append(snp_trace)
702-
subplot_heights.append(snp_row_height * n_snps_transcript)
703-
else:
704-
print(
705-
f"No SNPs were found below {snp_filter_min_maf} allele frequency. Omitting SNP genotype plot."
744+
elif isinstance(snp_transcript, list):
745+
for st in snp_transcript:
746+
figures, subplot_heights = self._insert_dipclust_snp_trace(
747+
transcript=st,
748+
figures=figures,
749+
subplot_heights=subplot_heights,
750+
sample_sets=sample_sets,
751+
sample_query=sample_query,
752+
sample_query_options=sample_query_options,
753+
snp_query=snp_query,
754+
site_mask=site_mask,
755+
dendro_sample_id_order=dendro_sample_id_order,
756+
snp_filter_min_maf=snp_filter_min_maf,
757+
snp_colorscale=snp_colorscale,
758+
chunks=chunks,
759+
inline_array=inline_array,
706760
)
707761

708762
# Calculate total height based on subplot heights, plus a fixed
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,17 @@
11
"""Parameters for diplotype clustering functions."""
22

3+
from typing_extensions import Annotated, TypeAlias, Union, Sequence
4+
35
from .distance_params import distance_metric
46
from .clustering_params import linkage_method
7+
from .base_params import transcript
58

69

710
linkage_method_default: linkage_method = "complete"
811

912
distance_metric_default: distance_metric = "cityblock"
13+
14+
snp_transcript: TypeAlias = Annotated[
15+
Union[transcript, Sequence[transcript]],
16+
"A transcript or a list of transcripts",
17+
]

malariagen_data/anopheles.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,6 @@
1212
import plotly.graph_objects as go # type: ignore
1313
from numpydoc_decorator import doc # type: ignore
1414

15-
from malariagen_data.anoph.snp_frq import (
16-
AnophelesSnpFrequencyAnalysis,
17-
)
18-
19-
from .anoph.cnv_frq import AnophelesCnvFrequencyAnalysis
2015

2116
from .anoph import (
2217
aim_params,
@@ -32,7 +27,6 @@
3227
from .anoph.karyotype import AnophelesKaryotypeAnalysis
3328
from .anoph.aim_data import AnophelesAimData
3429
from .anoph.base import AnophelesBase
35-
from .anoph.cnv_data import AnophelesCnvData
3630
from .anoph.genome_features import AnophelesGenomeFeaturesData
3731
from .anoph.genome_sequence import AnophelesGenomeSequenceData
3832
from .anoph.hap_data import AnophelesHapData, hap_params
@@ -88,8 +82,6 @@ class AnophelesDataResource(
8882
AnophelesH12Analysis,
8983
AnophelesG123Analysis,
9084
AnophelesFstAnalysis,
91-
AnophelesCnvFrequencyAnalysis,
92-
AnophelesSnpFrequencyAnalysis,
9385
AnophelesHapFrequencyAnalysis,
9486
AnophelesDistanceAnalysis,
9587
AnophelesPca,
@@ -99,7 +91,6 @@ class AnophelesDataResource(
9991
AnophelesAimData,
10092
AnophelesHapData,
10193
AnophelesSnpData,
102-
AnophelesCnvData,
10394
AnophelesSampleMetadata,
10495
AnophelesGenomeFeaturesData,
10596
AnophelesGenomeSequenceData,

0 commit comments

Comments
 (0)