Skip to content

Commit 7b08e61

Browse files
authored
Merge branch 'master' into 583-GWSS-colors
2 parents 05a0fcc + e1944db commit 7b08e61

File tree

13 files changed

+932
-364
lines changed

13 files changed

+932
-364
lines changed

docs/source/Af1.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,8 @@ Genome-wide selection scans
164164
plot_h12_calibration
165165
h12_gwss
166166
plot_h12_gwss
167+
plot_h12_gwss_multi_panel
168+
plot_h12_gwss_multi_overlay
167169
h1x_gwss
168170
plot_h1x_gwss
169171
g123_calibration

docs/source/Ag3.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,8 @@ Genome-wide selection scans
174174
plot_h12_calibration
175175
h12_gwss
176176
plot_h12_gwss
177+
plot_h12_gwss_multi_panel
178+
plot_h12_gwss_multi_overlay
177179
h1x_gwss
178180
plot_h1x_gwss
179181
g123_calibration

malariagen_data/anoph/distance.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -367,8 +367,13 @@ def _njt(
367367
# neighbour-joining iterations.
368368
progress_options = dict(desc="Construct neighbour-joining tree", leave=False)
369369

370-
# Decide which algorithm to use and run the neighbour-joining.
371-
if algorithm == "rapid":
370+
# Decide which algorithm to use and run the neighbour-joining. The "dynamic"
371+
# algorithm is fastest.
372+
if algorithm == "dynamic":
373+
Z = anjl.dynamic_nj(
374+
D=D, progress=self._progress, progress_options=progress_options
375+
)
376+
elif algorithm == "rapid":
372377
Z = anjl.rapid_nj(
373378
D=D, progress=self._progress, progress_options=progress_options
374379
)

malariagen_data/anoph/distance_params.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@
1414
default_distance_metric: distance_metric = "cityblock"
1515

1616
nj_algorithm: TypeAlias = Annotated[
17-
Literal["rapid", "canonical"],
18-
"Neighbour-joining algorithm to use.",
17+
Literal["dynamic", "rapid", "canonical"],
18+
"Neighbour-joining algorithm to use. The 'dynamic' algorithm is fastest.",
1919
]
2020

21-
default_nj_algorithm: nj_algorithm = "rapid"
21+
default_nj_algorithm: nj_algorithm = "dynamic"

malariagen_data/anoph/genome_features.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,10 +149,11 @@ def genome_features(
149149
df_part = df_part.query(f"end >= {r.start}")
150150
parts.append(df_part)
151151
df = pd.concat(parts, axis=0)
152-
return df.reset_index(drop=True).copy()
152+
return df.sort_values(["contig", "start"]).reset_index(drop=True).copy()
153153

154154
return (
155155
self._genome_features(attributes=attributes_normed)
156+
.sort_values(["contig", "start"])
156157
.reset_index(drop=True)
157158
.copy()
158159
)

malariagen_data/anoph/gplt_params.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,14 @@
8383
"A bokeh figure (only returned if show=False).",
8484
]
8585

86+
def_figure: TypeAlias = Annotated[
87+
# Use quite a broad type here to accommodate both single-panel figures
88+
# created via bokeh.plotting and multi-panel figures created via
89+
# bokeh.layouts.
90+
bokeh.model.Model,
91+
"A bokeh figure.",
92+
]
93+
8694
output_backend: TypeAlias = Annotated[
8795
Literal["canvas", "webgl", "svg"],
8896
"""

malariagen_data/anoph/h12.py

Lines changed: 301 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -526,6 +526,307 @@ def plot_h12_gwss(
526526
else:
527527
return fig
528528

529+
@check_types
530+
@doc(
531+
summary="Plot h12 GWSS data track with multiple traces overlaid.",
532+
)
533+
def plot_h12_gwss_multi_overlay_track(
534+
self,
535+
contig: base_params.contig,
536+
cohorts: base_params.cohorts,
537+
window_size: h12_params.multi_window_size,
538+
cohort_size: Optional[base_params.cohort_size] = h12_params.cohort_size_default,
539+
sample_query: Optional[base_params.sample_query] = None,
540+
analysis: hap_params.analysis = base_params.DEFAULT,
541+
min_cohort_size: Optional[
542+
base_params.min_cohort_size
543+
] = h12_params.min_cohort_size_default,
544+
max_cohort_size: Optional[
545+
base_params.max_cohort_size
546+
] = h12_params.max_cohort_size_default,
547+
title: Optional[gplt_params.title] = None,
548+
sample_query_options: Optional[base_params.sample_query_options] = None,
549+
sample_sets: Optional[base_params.sample_sets] = None,
550+
colors: gplt_params.colors = bokeh.palettes.d3["Category10"][10],
551+
random_seed: base_params.random_seed = 42,
552+
sizing_mode: gplt_params.sizing_mode = gplt_params.sizing_mode_default,
553+
width: gplt_params.width = gplt_params.width_default,
554+
height: gplt_params.height = 200,
555+
show: gplt_params.show = True,
556+
x_range: Optional[gplt_params.x_range] = None,
557+
output_backend: gplt_params.output_backend = gplt_params.output_backend_default,
558+
) -> gplt_params.figure:
559+
cohort_queries = self._setup_cohort_queries(
560+
cohorts=cohorts,
561+
sample_sets=sample_sets,
562+
sample_query=sample_query,
563+
sample_query_options=sample_query_options,
564+
cohort_size=cohort_size,
565+
min_cohort_size=None,
566+
)
567+
568+
if isinstance(window_size, int):
569+
window_size = {k: window_size for k in cohort_queries.keys()}
570+
elif isinstance(window_size, Mapping):
571+
if set(window_size.keys()) != set(cohort_queries.keys()):
572+
raise ValueError("Cohorts and window_sizes should have the same keys.")
573+
574+
# Compute H12.
575+
res = {}
576+
for cohort_label, cohort_query in cohort_queries.items():
577+
res[cohort_label] = self.h12_gwss(
578+
contig=contig,
579+
analysis=analysis,
580+
window_size=window_size[cohort_label],
581+
cohort_size=cohort_size,
582+
min_cohort_size=min_cohort_size,
583+
max_cohort_size=max_cohort_size,
584+
sample_query=cohort_query,
585+
sample_sets=sample_sets,
586+
random_seed=random_seed,
587+
)
588+
589+
# Determine X axis range.
590+
x, _ = res[list(cohort_queries.keys())[0]]
591+
x_min = x[0]
592+
x_max = x[-1]
593+
if x_range is None:
594+
x_range = bokeh.models.Range1d(x_min, x_max, bounds="auto")
595+
596+
# Create a figure.
597+
xwheel_zoom = bokeh.models.WheelZoomTool(
598+
dimensions="width", maintain_focus=False
599+
)
600+
601+
fig = bokeh.plotting.figure(
602+
title=title,
603+
tools=[
604+
"xpan",
605+
"xzoom_in",
606+
"xzoom_out",
607+
xwheel_zoom,
608+
"reset",
609+
"save",
610+
"crosshair",
611+
],
612+
active_inspect=None,
613+
active_scroll=xwheel_zoom,
614+
active_drag="xpan",
615+
sizing_mode=sizing_mode,
616+
width=width,
617+
height=height,
618+
toolbar_location="above",
619+
x_range=x_range,
620+
y_range=(0, 1),
621+
output_backend=output_backend,
622+
)
623+
624+
# Plot H12.
625+
for i, (cohort_label, (x, h12)) in enumerate(res.items()):
626+
fig.scatter(
627+
x=x,
628+
y=h12,
629+
marker="circle",
630+
size=3,
631+
line_width=1,
632+
line_color=colors[i % len(colors)],
633+
fill_color=None,
634+
legend_label=cohort_label,
635+
)
636+
637+
# Tidy up the plot.
638+
fig.yaxis.axis_label = "H12"
639+
fig.yaxis.ticker = [0, 1]
640+
self._bokeh_style_genome_xaxis(fig, contig)
641+
642+
if show: # pragma: no cover
643+
bokeh.plotting.show(fig)
644+
return None
645+
else:
646+
return fig
647+
648+
@check_types
649+
@doc(
650+
summary="Plot h12 GWSS data with multiple traces overlaid.",
651+
)
652+
def plot_h12_gwss_multi_overlay(
653+
self,
654+
contig: base_params.contig,
655+
cohorts: base_params.cohorts,
656+
window_size: h12_params.multi_window_size,
657+
cohort_size: Optional[base_params.cohort_size] = h12_params.cohort_size_default,
658+
sample_query: Optional[base_params.sample_query] = None,
659+
analysis: hap_params.analysis = base_params.DEFAULT,
660+
min_cohort_size: Optional[
661+
base_params.min_cohort_size
662+
] = h12_params.min_cohort_size_default,
663+
max_cohort_size: Optional[
664+
base_params.max_cohort_size
665+
] = h12_params.max_cohort_size_default,
666+
sample_query_options: Optional[base_params.sample_query_options] = None,
667+
sample_sets: Optional[base_params.sample_sets] = None,
668+
colors: gplt_params.colors = bokeh.palettes.d3["Category10"][10],
669+
random_seed: base_params.random_seed = 42,
670+
title: Optional[gplt_params.title] = None,
671+
sizing_mode: gplt_params.sizing_mode = gplt_params.sizing_mode_default,
672+
width: gplt_params.width = gplt_params.width_default,
673+
track_height: gplt_params.track_height = 170,
674+
genes_height: gplt_params.genes_height = gplt_params.genes_height_default,
675+
show: gplt_params.show = True,
676+
output_backend: gplt_params.output_backend = gplt_params.output_backend_default,
677+
) -> gplt_params.figure:
678+
# Plot GWSS track.
679+
fig1 = self.plot_h12_gwss_multi_overlay_track(
680+
contig=contig,
681+
sample_query=sample_query,
682+
cohorts=cohorts,
683+
cohort_size=cohort_size,
684+
window_size=window_size,
685+
analysis=analysis,
686+
min_cohort_size=min_cohort_size,
687+
max_cohort_size=max_cohort_size,
688+
sample_query_options=sample_query_options,
689+
sample_sets=sample_sets,
690+
colors=colors,
691+
random_seed=random_seed,
692+
title=title,
693+
sizing_mode=sizing_mode,
694+
width=width,
695+
height=track_height,
696+
show=False,
697+
output_backend=output_backend,
698+
)
699+
700+
fig1.xaxis.visible = False
701+
fig1.legend.location = "top_right"
702+
fig1.legend.click_policy = "hide"
703+
704+
# Plot genes.
705+
fig2 = self.plot_genes(
706+
region=contig,
707+
sizing_mode=sizing_mode,
708+
width=width,
709+
height=genes_height,
710+
x_range=fig1.x_range,
711+
show=False,
712+
output_backend=output_backend,
713+
)
714+
715+
# Combine plots into a single figure.
716+
fig = bokeh.layouts.gridplot(
717+
[fig1, fig2],
718+
ncols=1,
719+
toolbar_location="above",
720+
merge_tools=True,
721+
sizing_mode=sizing_mode,
722+
toolbar_options=dict(active_inspect=None),
723+
)
724+
725+
if show: # pragma: no cover
726+
bokeh.plotting.show(fig)
727+
return None
728+
else:
729+
return fig
730+
731+
@check_types
732+
@doc(
733+
summary="Plot h12 GWSS data with multiple tracks.",
734+
)
735+
def plot_h12_gwss_multi_panel(
736+
self,
737+
contig: base_params.contig,
738+
cohorts: base_params.cohorts,
739+
window_size: h12_params.multi_window_size,
740+
cohort_size: Optional[base_params.cohort_size] = h12_params.cohort_size_default,
741+
sample_query: Optional[base_params.sample_query] = None,
742+
analysis: hap_params.analysis = base_params.DEFAULT,
743+
min_cohort_size: Optional[
744+
base_params.min_cohort_size
745+
] = h12_params.min_cohort_size_default,
746+
max_cohort_size: Optional[
747+
base_params.max_cohort_size
748+
] = h12_params.max_cohort_size_default,
749+
sample_query_options: Optional[base_params.sample_query_options] = None,
750+
sample_sets: Optional[base_params.sample_sets] = None,
751+
random_seed: base_params.random_seed = 42,
752+
sizing_mode: gplt_params.sizing_mode = gplt_params.sizing_mode_default,
753+
width: gplt_params.width = gplt_params.width_default,
754+
track_height: gplt_params.track_height = 170,
755+
genes_height: gplt_params.genes_height = gplt_params.genes_height_default,
756+
show: gplt_params.show = True,
757+
output_backend: gplt_params.output_backend = gplt_params.output_backend_default,
758+
) -> gplt_params.figure:
759+
cohort_queries = self._setup_cohort_queries(
760+
cohorts=cohorts,
761+
sample_sets=sample_sets,
762+
sample_query=sample_query,
763+
sample_query_options=sample_query_options,
764+
cohort_size=cohort_size,
765+
min_cohort_size=None,
766+
)
767+
768+
if isinstance(window_size, int):
769+
window_size = {k: window_size for k in cohort_queries.keys()}
770+
elif isinstance(window_size, Mapping):
771+
if set(window_size.keys()) != set(cohort_queries.keys()):
772+
raise ValueError("Cohorts and window_sizes should have the same keys.")
773+
774+
# Plot GWSS track.
775+
figs: list[gplt_params.def_figure] = []
776+
for i, (cohort_label, cohort_query) in enumerate(cohort_queries.items()):
777+
params = dict(
778+
contig=contig,
779+
analysis=analysis,
780+
window_size=window_size[cohort_label],
781+
sample_sets=sample_sets,
782+
sample_query=cohort_query,
783+
cohort_size=cohort_size,
784+
min_cohort_size=min_cohort_size,
785+
max_cohort_size=max_cohort_size,
786+
random_seed=random_seed,
787+
title=cohort_label, # Deal with a choice of titles later
788+
sizing_mode=sizing_mode,
789+
width=width,
790+
height=track_height,
791+
show=False,
792+
output_backend=output_backend,
793+
)
794+
if i > 0:
795+
track = self.plot_h12_gwss_track(x_range=figs[0].x_range, **params)
796+
else:
797+
track = self.plot_h12_gwss_track(**params)
798+
track.xaxis.visible = False
799+
figs.append(track)
800+
801+
# Plot genes.
802+
fig2 = self.plot_genes(
803+
region=contig,
804+
sizing_mode=sizing_mode,
805+
width=width,
806+
height=genes_height,
807+
x_range=figs[0].x_range,
808+
show=False,
809+
output_backend=output_backend,
810+
)
811+
812+
figs.append(fig2)
813+
814+
# Combine plots into a single figure.
815+
fig = bokeh.layouts.gridplot(
816+
figs,
817+
ncols=1,
818+
toolbar_location="above",
819+
merge_tools=True,
820+
sizing_mode=sizing_mode,
821+
toolbar_options=dict(active_inspect=None),
822+
)
823+
824+
if show: # pragma: no cover
825+
bokeh.plotting.show(fig)
826+
return None
827+
else:
828+
return fig
829+
529830

530831
def haplotype_frequencies(h):
531832
"""Compute haplotype frequencies, returning a dictionary that maps

0 commit comments

Comments
 (0)