Skip to content

Commit 20efee0

Browse files
jonbrenasleehartAlistair Milesalimanfoo
authored
Plot H12 GWSS traces for multiple cohorts together in a single figure (#595)
* First draft for multiple traces of H12. There are still things that I am not too happy about. * Forgot one of the shared params. * Solving linting issues * Solving more linting issues * Trying to get around assertions. * Trying to get around linting. * Trying to get around linting. * Trying to make the params for the multiple tracks make sense * Corrected a typo * More arguments moved to sample queries * Forgot a few selves * Started working on the docs * Followed Alistair's advices (I think). Still need to add tests and a better way to handle titles. * Messed up the type definition of window_sizes * Added some tests and made some little changes * Improved the def of window_size * add examples of multi plots * Update malariagen_data/anoph/h12.py Co-authored-by: Alistair Miles <[email protected]> * Update malariagen_data/anoph/h12.py Co-authored-by: Alistair Miles <[email protected]> * Update malariagen_data/anoph/h12.py Co-authored-by: Alistair Miles <[email protected]> * Update malariagen_data/anoph/h12.py Co-authored-by: Alistair Miles <[email protected]> * Update malariagen_data/anoph/h12_params.py Co-authored-by: Alistair Miles <[email protected]> * Hoping a tab might solve the issue. * add example using cohorts and window_size as dict * Update malariagen_data/anoph/h12.py Co-authored-by: Alistair Miles <[email protected]> * Update malariagen_data/anoph/h12.py Co-authored-by: Alistair Miles <[email protected]> * Update malariagen_data/anoph/h12.py Co-authored-by: Alistair Miles <[email protected]> * Update h12.py * More cohorts for the H12 tests * Reorganized the tests * Added a test for the dict version of window_size * Update malariagen_data/anoph/h12.py * add h12 multi functions to API docs --------- Co-authored-by: Lee <[email protected]> Co-authored-by: Alistair Miles <[email protected]> Co-authored-by: Alistair Miles <[email protected]>
1 parent eb6693e commit 20efee0

File tree

7 files changed

+578
-14
lines changed

7 files changed

+578
-14
lines changed

docs/source/Af1.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,8 @@ Genome-wide selection scans
164164
plot_h12_calibration
165165
h12_gwss
166166
plot_h12_gwss
167+
plot_h12_gwss_multi_panel
168+
plot_h12_gwss_multi_overlay
167169
h1x_gwss
168170
plot_h1x_gwss
169171
g123_calibration

docs/source/Ag3.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,8 @@ Genome-wide selection scans
174174
plot_h12_calibration
175175
h12_gwss
176176
plot_h12_gwss
177+
plot_h12_gwss_multi_panel
178+
plot_h12_gwss_multi_overlay
177179
h1x_gwss
178180
plot_h1x_gwss
179181
g123_calibration

malariagen_data/anoph/gplt_params.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""Parameters for genome plotting functions. N.B., genome plots are always
22
plotted with bokeh."""
33

4-
from typing import Literal, Mapping, Optional, Union
4+
from typing import Literal, Mapping, Optional, Union, Sequence
55

66
import bokeh.models
77
from typing_extensions import Annotated, TypeAlias
@@ -83,6 +83,14 @@
8383
"A bokeh figure (only returned if show=False).",
8484
]
8585

86+
def_figure: TypeAlias = Annotated[
87+
# Use quite a broad type here to accommodate both single-panel figures
88+
# created via bokeh.plotting and multi-panel figures created via
89+
# bokeh.layouts.
90+
bokeh.model.Model,
91+
"A bokeh figure.",
92+
]
93+
8694
output_backend: TypeAlias = Annotated[
8795
Literal["canvas", "webgl", "svg"],
8896
"""
@@ -103,3 +111,5 @@
103111
Mapping,
104112
"Passed through to bokeh line() function.",
105113
]
114+
115+
colors: TypeAlias = Annotated[Sequence[str], "List of colors."]

malariagen_data/anoph/h12.py

Lines changed: 301 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -514,6 +514,307 @@ def plot_h12_gwss(
514514
else:
515515
return fig
516516

517+
@check_types
518+
@doc(
519+
summary="Plot h12 GWSS data track with multiple traces overlaid.",
520+
)
521+
def plot_h12_gwss_multi_overlay_track(
522+
self,
523+
contig: base_params.contig,
524+
cohorts: base_params.cohorts,
525+
window_size: h12_params.multi_window_size,
526+
cohort_size: Optional[base_params.cohort_size] = h12_params.cohort_size_default,
527+
sample_query: Optional[base_params.sample_query] = None,
528+
analysis: hap_params.analysis = base_params.DEFAULT,
529+
min_cohort_size: Optional[
530+
base_params.min_cohort_size
531+
] = h12_params.min_cohort_size_default,
532+
max_cohort_size: Optional[
533+
base_params.max_cohort_size
534+
] = h12_params.max_cohort_size_default,
535+
title: Optional[gplt_params.title] = None,
536+
sample_query_options: Optional[base_params.sample_query_options] = None,
537+
sample_sets: Optional[base_params.sample_sets] = None,
538+
colors: gplt_params.colors = bokeh.palettes.d3["Category10"][10],
539+
random_seed: base_params.random_seed = 42,
540+
sizing_mode: gplt_params.sizing_mode = gplt_params.sizing_mode_default,
541+
width: gplt_params.width = gplt_params.width_default,
542+
height: gplt_params.height = 200,
543+
show: gplt_params.show = True,
544+
x_range: Optional[gplt_params.x_range] = None,
545+
output_backend: gplt_params.output_backend = gplt_params.output_backend_default,
546+
) -> gplt_params.figure:
547+
cohort_queries = self._setup_cohort_queries(
548+
cohorts=cohorts,
549+
sample_sets=sample_sets,
550+
sample_query=sample_query,
551+
sample_query_options=sample_query_options,
552+
cohort_size=cohort_size,
553+
min_cohort_size=None,
554+
)
555+
556+
if isinstance(window_size, int):
557+
window_size = {k: window_size for k in cohort_queries.keys()}
558+
elif isinstance(window_size, Mapping):
559+
if set(window_size.keys()) != set(cohort_queries.keys()):
560+
raise ValueError("Cohorts and window_sizes should have the same keys.")
561+
562+
# Compute H12.
563+
res = {}
564+
for cohort_label, cohort_query in cohort_queries.items():
565+
res[cohort_label] = self.h12_gwss(
566+
contig=contig,
567+
analysis=analysis,
568+
window_size=window_size[cohort_label],
569+
cohort_size=cohort_size,
570+
min_cohort_size=min_cohort_size,
571+
max_cohort_size=max_cohort_size,
572+
sample_query=cohort_query,
573+
sample_sets=sample_sets,
574+
random_seed=random_seed,
575+
)
576+
577+
# Determine X axis range.
578+
x, _ = res[list(cohort_queries.keys())[0]]
579+
x_min = x[0]
580+
x_max = x[-1]
581+
if x_range is None:
582+
x_range = bokeh.models.Range1d(x_min, x_max, bounds="auto")
583+
584+
# Create a figure.
585+
xwheel_zoom = bokeh.models.WheelZoomTool(
586+
dimensions="width", maintain_focus=False
587+
)
588+
589+
fig = bokeh.plotting.figure(
590+
title=title,
591+
tools=[
592+
"xpan",
593+
"xzoom_in",
594+
"xzoom_out",
595+
xwheel_zoom,
596+
"reset",
597+
"save",
598+
"crosshair",
599+
],
600+
active_inspect=None,
601+
active_scroll=xwheel_zoom,
602+
active_drag="xpan",
603+
sizing_mode=sizing_mode,
604+
width=width,
605+
height=height,
606+
toolbar_location="above",
607+
x_range=x_range,
608+
y_range=(0, 1),
609+
output_backend=output_backend,
610+
)
611+
612+
# Plot H12.
613+
for i, (cohort_label, (x, h12)) in enumerate(res.items()):
614+
fig.scatter(
615+
x=x,
616+
y=h12,
617+
marker="circle",
618+
size=3,
619+
line_width=1,
620+
line_color=colors[i % len(colors)],
621+
fill_color=None,
622+
legend_label=cohort_label,
623+
)
624+
625+
# Tidy up the plot.
626+
fig.yaxis.axis_label = "H12"
627+
fig.yaxis.ticker = [0, 1]
628+
self._bokeh_style_genome_xaxis(fig, contig)
629+
630+
if show: # pragma: no cover
631+
bokeh.plotting.show(fig)
632+
return None
633+
else:
634+
return fig
635+
636+
@check_types
637+
@doc(
638+
summary="Plot h12 GWSS data with multiple traces overlaid.",
639+
)
640+
def plot_h12_gwss_multi_overlay(
641+
self,
642+
contig: base_params.contig,
643+
cohorts: base_params.cohorts,
644+
window_size: h12_params.multi_window_size,
645+
cohort_size: Optional[base_params.cohort_size] = h12_params.cohort_size_default,
646+
sample_query: Optional[base_params.sample_query] = None,
647+
analysis: hap_params.analysis = base_params.DEFAULT,
648+
min_cohort_size: Optional[
649+
base_params.min_cohort_size
650+
] = h12_params.min_cohort_size_default,
651+
max_cohort_size: Optional[
652+
base_params.max_cohort_size
653+
] = h12_params.max_cohort_size_default,
654+
sample_query_options: Optional[base_params.sample_query_options] = None,
655+
sample_sets: Optional[base_params.sample_sets] = None,
656+
colors: gplt_params.colors = bokeh.palettes.d3["Category10"][10],
657+
random_seed: base_params.random_seed = 42,
658+
title: Optional[gplt_params.title] = None,
659+
sizing_mode: gplt_params.sizing_mode = gplt_params.sizing_mode_default,
660+
width: gplt_params.width = gplt_params.width_default,
661+
track_height: gplt_params.track_height = 170,
662+
genes_height: gplt_params.genes_height = gplt_params.genes_height_default,
663+
show: gplt_params.show = True,
664+
output_backend: gplt_params.output_backend = gplt_params.output_backend_default,
665+
) -> gplt_params.figure:
666+
# Plot GWSS track.
667+
fig1 = self.plot_h12_gwss_multi_overlay_track(
668+
contig=contig,
669+
sample_query=sample_query,
670+
cohorts=cohorts,
671+
cohort_size=cohort_size,
672+
window_size=window_size,
673+
analysis=analysis,
674+
min_cohort_size=min_cohort_size,
675+
max_cohort_size=max_cohort_size,
676+
sample_query_options=sample_query_options,
677+
sample_sets=sample_sets,
678+
colors=colors,
679+
random_seed=random_seed,
680+
title=title,
681+
sizing_mode=sizing_mode,
682+
width=width,
683+
height=track_height,
684+
show=False,
685+
output_backend=output_backend,
686+
)
687+
688+
fig1.xaxis.visible = False
689+
fig1.legend.location = "top_right"
690+
fig1.legend.click_policy = "hide"
691+
692+
# Plot genes.
693+
fig2 = self.plot_genes(
694+
region=contig,
695+
sizing_mode=sizing_mode,
696+
width=width,
697+
height=genes_height,
698+
x_range=fig1.x_range,
699+
show=False,
700+
output_backend=output_backend,
701+
)
702+
703+
# Combine plots into a single figure.
704+
fig = bokeh.layouts.gridplot(
705+
[fig1, fig2],
706+
ncols=1,
707+
toolbar_location="above",
708+
merge_tools=True,
709+
sizing_mode=sizing_mode,
710+
toolbar_options=dict(active_inspect=None),
711+
)
712+
713+
if show: # pragma: no cover
714+
bokeh.plotting.show(fig)
715+
return None
716+
else:
717+
return fig
718+
719+
@check_types
720+
@doc(
721+
summary="Plot h12 GWSS data with multiple tracks.",
722+
)
723+
def plot_h12_gwss_multi_panel(
724+
self,
725+
contig: base_params.contig,
726+
cohorts: base_params.cohorts,
727+
window_size: h12_params.multi_window_size,
728+
cohort_size: Optional[base_params.cohort_size] = h12_params.cohort_size_default,
729+
sample_query: Optional[base_params.sample_query] = None,
730+
analysis: hap_params.analysis = base_params.DEFAULT,
731+
min_cohort_size: Optional[
732+
base_params.min_cohort_size
733+
] = h12_params.min_cohort_size_default,
734+
max_cohort_size: Optional[
735+
base_params.max_cohort_size
736+
] = h12_params.max_cohort_size_default,
737+
sample_query_options: Optional[base_params.sample_query_options] = None,
738+
sample_sets: Optional[base_params.sample_sets] = None,
739+
random_seed: base_params.random_seed = 42,
740+
sizing_mode: gplt_params.sizing_mode = gplt_params.sizing_mode_default,
741+
width: gplt_params.width = gplt_params.width_default,
742+
track_height: gplt_params.track_height = 170,
743+
genes_height: gplt_params.genes_height = gplt_params.genes_height_default,
744+
show: gplt_params.show = True,
745+
output_backend: gplt_params.output_backend = gplt_params.output_backend_default,
746+
) -> gplt_params.figure:
747+
cohort_queries = self._setup_cohort_queries(
748+
cohorts=cohorts,
749+
sample_sets=sample_sets,
750+
sample_query=sample_query,
751+
sample_query_options=sample_query_options,
752+
cohort_size=cohort_size,
753+
min_cohort_size=None,
754+
)
755+
756+
if isinstance(window_size, int):
757+
window_size = {k: window_size for k in cohort_queries.keys()}
758+
elif isinstance(window_size, Mapping):
759+
if set(window_size.keys()) != set(cohort_queries.keys()):
760+
raise ValueError("Cohorts and window_sizes should have the same keys.")
761+
762+
# Plot GWSS track.
763+
figs: list[gplt_params.def_figure] = []
764+
for i, (cohort_label, cohort_query) in enumerate(cohort_queries.items()):
765+
params = dict(
766+
contig=contig,
767+
analysis=analysis,
768+
window_size=window_size[cohort_label],
769+
sample_sets=sample_sets,
770+
sample_query=cohort_query,
771+
cohort_size=cohort_size,
772+
min_cohort_size=min_cohort_size,
773+
max_cohort_size=max_cohort_size,
774+
random_seed=random_seed,
775+
title=cohort_label, # Deal with a choice of titles later
776+
sizing_mode=sizing_mode,
777+
width=width,
778+
height=track_height,
779+
show=False,
780+
output_backend=output_backend,
781+
)
782+
if i > 0:
783+
track = self.plot_h12_gwss_track(x_range=figs[0].x_range, **params)
784+
else:
785+
track = self.plot_h12_gwss_track(**params)
786+
track.xaxis.visible = False
787+
figs.append(track)
788+
789+
# Plot genes.
790+
fig2 = self.plot_genes(
791+
region=contig,
792+
sizing_mode=sizing_mode,
793+
width=width,
794+
height=genes_height,
795+
x_range=figs[0].x_range,
796+
show=False,
797+
output_backend=output_backend,
798+
)
799+
800+
figs.append(fig2)
801+
802+
# Combine plots into a single figure.
803+
fig = bokeh.layouts.gridplot(
804+
figs,
805+
ncols=1,
806+
toolbar_location="above",
807+
merge_tools=True,
808+
sizing_mode=sizing_mode,
809+
toolbar_options=dict(active_inspect=None),
810+
)
811+
812+
if show: # pragma: no cover
813+
bokeh.plotting.show(fig)
814+
return None
815+
else:
816+
return fig
817+
517818

518819
def haplotype_frequencies(h):
519820
"""Compute haplotype frequencies, returning a dictionary that maps

malariagen_data/anoph/h12_params.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
"""Parameter definitions for H12 analysis functions."""
22

3-
from typing import Optional, Sequence
3+
from typing import Optional, Sequence, Union
44

55
from typing_extensions import Annotated, TypeAlias
66

@@ -22,6 +22,16 @@
2222
""",
2323
]
2424

25+
multi_window_size: TypeAlias = Annotated[
26+
Union[window_size, dict[str, int]],
27+
"""
28+
The size of windows (number of SNPs) used to calculate statistics within. Can
29+
be a single value, in which case the same window size will be used for all
30+
cohorts. Can also be a mapping from cohort identifiers to values, in case
31+
you need to provide different window sizes for different cohorts.
32+
""",
33+
]
34+
2535
cohort_size_default: Optional[base_params.cohort_size] = None
2636

2737
min_cohort_size_default: base_params.min_cohort_size = 15

0 commit comments

Comments
 (0)