Skip to content

Commit 6c27d20

Browse files
authored
Merge pull request #653 from malariagen/GH407_allow_gene_labels
Add gene_labels and gene_labelset to plot_genes()
2 parents 6cd964b + db100fc commit 6c27d20

File tree

11 files changed

+243
-2
lines changed

11 files changed

+243
-2
lines changed

malariagen_data/anoph/cnv_data.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -751,6 +751,8 @@ def plot_cnv_hmm_coverage(
751751
line_kwargs: Optional[gplt_params.line_kwargs] = None,
752752
show: gplt_params.show = True,
753753
output_backend: gplt_params.output_backend = gplt_params.output_backend_default,
754+
gene_labels: Optional[gplt_params.gene_labels] = None,
755+
gene_labelset: Optional[gplt_params.gene_labelset] = None,
754756
) -> gplt_params.figure:
755757
debug = self._log.debug
756758

@@ -782,6 +784,8 @@ def plot_cnv_hmm_coverage(
782784
x_range=fig1.x_range,
783785
show=False,
784786
output_backend=output_backend,
787+
gene_labels=gene_labels,
788+
gene_labelset=gene_labelset,
785789
)
786790

787791
debug("combine plots into a single figure")
@@ -960,6 +964,8 @@ def plot_cnv_hmm_heatmap(
960964
track_height: Optional[gplt_params.track_height] = None,
961965
genes_height: gplt_params.genes_height = gplt_params.genes_height_default,
962966
show: gplt_params.show = True,
967+
gene_labels: Optional[gplt_params.gene_labels] = None,
968+
gene_labelset: Optional[gplt_params.gene_labelset] = None,
963969
) -> gplt_params.figure:
964970
debug = self._log.debug
965971

@@ -989,6 +995,8 @@ def plot_cnv_hmm_heatmap(
989995
height=genes_height,
990996
x_range=fig1.x_range,
991997
show=False,
998+
gene_labels=gene_labels,
999+
gene_labelset=gene_labelset,
9921000
)
9931001

9941002
debug("combine plots into a single figure")

malariagen_data/anoph/fst.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,8 @@ def plot_fst_gwss(
293293
show: gplt_params.show = True,
294294
output_backend: gplt_params.output_backend = gplt_params.output_backend_default,
295295
clip_min: fst_params.clip_min = 0.0,
296+
gene_labels: Optional[gplt_params.gene_labels] = None,
297+
gene_labelset: Optional[gplt_params.gene_labelset] = None,
296298
) -> gplt_params.figure:
297299
# gwss track
298300
fig1 = self.plot_fst_gwss_track(
@@ -327,6 +329,8 @@ def plot_fst_gwss(
327329
x_range=fig1.x_range,
328330
show=False,
329331
output_backend=output_backend,
332+
gene_labels=gene_labels,
333+
gene_labelset=gene_labelset,
330334
)
331335

332336
# combine plots into a single figure

malariagen_data/anoph/g123.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -438,6 +438,8 @@ def plot_g123_gwss(
438438
output_backend: gplt_params.output_backend = gplt_params.output_backend_default,
439439
inline_array: base_params.inline_array = base_params.inline_array_default,
440440
chunks: base_params.chunks = base_params.native_chunks,
441+
gene_labels: Optional[gplt_params.gene_labels] = None,
442+
gene_labelset: Optional[gplt_params.gene_labelset] = None,
441443
) -> gplt_params.figure:
442444
# gwss track
443445
fig1 = self.plot_g123_gwss_track(
@@ -472,6 +474,8 @@ def plot_g123_gwss(
472474
x_range=fig1.x_range,
473475
show=False,
474476
output_backend=output_backend,
477+
gene_labels=gene_labels,
478+
gene_labelset=gene_labelset,
475479
)
476480

477481
# combine plots into a single figure

malariagen_data/anoph/genome_features.py

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -333,6 +333,8 @@ def plot_genes(
333333
x_range: Optional[gplt_params.x_range] = None,
334334
title: Optional[gplt_params.title] = None,
335335
output_backend: gplt_params.output_backend = gplt_params.output_backend_default,
336+
gene_labels: Optional[gplt_params.gene_labels] = None,
337+
gene_labelset: Optional[gplt_params.gene_labelset] = None,
336338
) -> gplt_params.figure:
337339
debug = self._log.debug
338340

@@ -408,6 +410,101 @@ def plot_genes(
408410
line_width=0,
409411
)
410412

413+
if gene_labels:
414+
debug("determine new figure height and range to accommodate gene labels")
415+
416+
# Increase the figure height by a certain factor, to accommodate labels.
417+
height_increase_factor = 1.3
418+
fig.height = int(fig.height * height_increase_factor)
419+
420+
# Get the original y_range.
421+
# Note: fig.y_range is not subscriptable.
422+
orig_y_range = fig.y_range.start, fig.y_range.end
423+
424+
# Determine the midpoint of the original range, to rescale outward from there.
425+
orig_mid_y_range = (orig_y_range[0] + orig_y_range[1]) / 2
426+
orig_y_range_extent = orig_y_range[1] - orig_y_range[0]
427+
428+
# Determine the new start and end points of the extended range.
429+
new_y_range_extent = orig_y_range_extent * height_increase_factor
430+
new_y_range_extent_half = new_y_range_extent / 2
431+
new_y_start = orig_mid_y_range - new_y_range_extent_half
432+
new_y_end = orig_mid_y_range + new_y_range_extent_half
433+
434+
# Set the new y_range.
435+
fig.y_range = bokeh.models.Range1d(new_y_start, new_y_end)
436+
437+
debug("determine midpoint of each gene rectangle")
438+
data["mid_x"] = (data["start"] + data["end"]) / 2
439+
440+
debug("make gene labels and pointers")
441+
442+
# Put gene_labels into a new column, where the gene_id matches.
443+
# Fill unmapped genes with empty strings, otherwise "NaN" would be displayed.
444+
data["gene_label"] = data["ID"].map(gene_labels).fillna("")
445+
446+
# Put gene pointers (▲ or ▼) in a new column, depending on the strand.
447+
# Except if the gene_label is null or an empty string, which should not be shown.
448+
data["gene_pointer"] = data.apply(
449+
lambda row: ("▼" if row["strand"] == "+" else "▲")
450+
if row["gene_label"]
451+
else "",
452+
axis=1,
453+
)
454+
455+
# Put the pointer above or below the gene rectangle, depending on + or - strand.
456+
neg_strand_pointer_y = orig_mid_y_range - 1.1
457+
pos_strand_pointer_y = orig_mid_y_range + 1.1
458+
data["pointer_y"] = data["strand"].apply(
459+
lambda strand: pos_strand_pointer_y
460+
if strand == "+"
461+
else neg_strand_pointer_y
462+
)
463+
464+
# Put the label above or below the gene rectangle, depending on + or - strand.
465+
neg_strand_label_y = orig_mid_y_range - 1.25
466+
pos_strand_label_y = orig_mid_y_range + 1.3
467+
data["label_y"] = data["strand"].apply(
468+
lambda strand: pos_strand_label_y
469+
if strand == "+"
470+
else neg_strand_label_y
471+
)
472+
473+
# Get the data as a ColumnDataSource.
474+
data_as_cds = bokeh.models.ColumnDataSource(data)
475+
476+
# Create a LabelSet for the gene pointers.
477+
gene_pointers_ls = bokeh.models.LabelSet(
478+
source=data_as_cds,
479+
x="mid_x",
480+
y="pointer_y",
481+
text="gene_pointer",
482+
text_align="center",
483+
text_baseline="middle",
484+
text_font_size="9pt",
485+
text_color="#444444",
486+
)
487+
488+
# Create a LabelSet for the gene labels.
489+
gene_labels_ls = bokeh.models.LabelSet(
490+
source=data_as_cds,
491+
x="mid_x",
492+
y="label_y",
493+
text="gene_label",
494+
text_align="left",
495+
text_baseline="middle",
496+
text_font_size="9pt",
497+
text_color="#444444",
498+
x_offset=8,
499+
)
500+
501+
# Add the markers and labels to the figure.
502+
fig.add_layout(gene_pointers_ls)
503+
fig.add_layout(gene_labels_ls)
504+
505+
if gene_labelset:
506+
fig.add_layout(gene_labelset)
507+
411508
debug("tidy up the plot")
412509
fig.ygrid.visible = False
413510
yticks = [0.4, 1.4]

malariagen_data/anoph/gplt_params.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,3 +120,13 @@
120120
contig_colors_default: Final[contig_colors] = list(bokeh.palettes.d3["Category20b"][5])
121121

122122
colors: TypeAlias = Annotated[Sequence[str], "List of colors."]
123+
124+
gene_labels: TypeAlias = Annotated[
125+
Mapping[str, str],
126+
"A mapping of gene identifiers to custom labels, which will appear in the plot.",
127+
]
128+
129+
gene_labelset: TypeAlias = Annotated[
130+
bokeh.models.LabelSet,
131+
"A LabelSet to use in the plot.",
132+
]

malariagen_data/anoph/h12.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -473,6 +473,8 @@ def plot_h12_gwss(
473473
output_backend: gplt_params.output_backend = gplt_params.output_backend_default,
474474
chunks: base_params.chunks = base_params.native_chunks,
475475
inline_array: base_params.inline_array = base_params.inline_array_default,
476+
gene_labels: Optional[gplt_params.gene_labels] = None,
477+
gene_labelset: Optional[gplt_params.gene_labelset] = None,
476478
) -> gplt_params.figure:
477479
# Plot GWSS track.
478480
fig1 = self.plot_h12_gwss_track(
@@ -508,6 +510,8 @@ def plot_h12_gwss(
508510
x_range=fig1.x_range,
509511
show=False,
510512
output_backend=output_backend,
513+
gene_labels=gene_labels,
514+
gene_labelset=gene_labelset,
511515
)
512516

513517
# Combine plots into a single figure.
@@ -674,6 +678,8 @@ def plot_h12_gwss_multi_overlay(
674678
genes_height: gplt_params.genes_height = gplt_params.genes_height_default,
675679
show: gplt_params.show = True,
676680
output_backend: gplt_params.output_backend = gplt_params.output_backend_default,
681+
gene_labels: Optional[gplt_params.gene_labels] = None,
682+
gene_labelset: Optional[gplt_params.gene_labelset] = None,
677683
) -> gplt_params.figure:
678684
# Plot GWSS track.
679685
fig1 = self.plot_h12_gwss_multi_overlay_track(
@@ -710,6 +716,8 @@ def plot_h12_gwss_multi_overlay(
710716
x_range=fig1.x_range,
711717
show=False,
712718
output_backend=output_backend,
719+
gene_labels=gene_labels,
720+
gene_labelset=gene_labelset,
713721
)
714722

715723
# Combine plots into a single figure.
@@ -755,6 +763,8 @@ def plot_h12_gwss_multi_panel(
755763
genes_height: gplt_params.genes_height = gplt_params.genes_height_default,
756764
show: gplt_params.show = True,
757765
output_backend: gplt_params.output_backend = gplt_params.output_backend_default,
766+
gene_labels: Optional[gplt_params.gene_labels] = None,
767+
gene_labelset: Optional[gplt_params.gene_labelset] = None,
758768
) -> gplt_params.figure:
759769
cohort_queries = self._setup_cohort_queries(
760770
cohorts=cohorts,
@@ -807,6 +817,8 @@ def plot_h12_gwss_multi_panel(
807817
x_range=figs[0].x_range,
808818
show=False,
809819
output_backend=output_backend,
820+
gene_labels=gene_labels,
821+
gene_labelset=gene_labelset,
810822
)
811823

812824
figs.append(fig2)

malariagen_data/anoph/h1x.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,8 @@ def plot_h1x_gwss(
305305
output_backend: gplt_params.output_backend = gplt_params.output_backend_default,
306306
chunks: base_params.chunks = base_params.native_chunks,
307307
inline_array: base_params.inline_array = base_params.inline_array_default,
308+
gene_labels: Optional[gplt_params.gene_labels] = None,
309+
gene_labelset: Optional[gplt_params.gene_labelset] = None,
308310
) -> gplt_params.figure:
309311
# Plot GWSS track.
310312
fig1 = self.plot_h1x_gwss_track(
@@ -341,6 +343,8 @@ def plot_h1x_gwss(
341343
x_range=fig1.x_range,
342344
show=False,
343345
output_backend=output_backend,
346+
gene_labels=gene_labels,
347+
gene_labelset=gene_labelset,
344348
)
345349

346350
# Combine plots into a single figure.

malariagen_data/anoph/snp_data.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1305,6 +1305,8 @@ def plot_snps(
13051305
genes_height: gplt_params.genes_height = gplt_params.genes_height_default,
13061306
max_snps: int = 200_000,
13071307
show: gplt_params.show = True,
1308+
gene_labels: Optional[gplt_params.gene_labels] = None,
1309+
gene_labelset: Optional[gplt_params.gene_labelset] = None,
13081310
) -> gplt_params.figure:
13091311
# Plot SNPs track.
13101312
fig1 = self.plot_snps_track(
@@ -1330,6 +1332,8 @@ def plot_snps(
13301332
height=genes_height,
13311333
x_range=fig1.x_range,
13321334
show=False,
1335+
gene_labels=gene_labels,
1336+
gene_labelset=gene_labelset,
13331337
)
13341338

13351339
# Layout tracks in a grid.

malariagen_data/anopheles.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -431,6 +431,8 @@ def plot_heterozygosity(
431431
output_backend: gplt_params.output_backend = gplt_params.output_backend_default,
432432
chunks: base_params.chunks = base_params.native_chunks,
433433
inline_array: base_params.inline_array = base_params.inline_array_default,
434+
gene_labels: Optional[gplt_params.gene_labels] = None,
435+
gene_labelset: Optional[gplt_params.gene_labelset] = None,
434436
) -> gplt_params.figure:
435437
debug = self._log.debug
436438

@@ -491,6 +493,8 @@ def plot_heterozygosity(
491493
x_range=fig1.x_range,
492494
show=False,
493495
output_backend=output_backend,
496+
gene_labels=gene_labels,
497+
gene_labelset=gene_labelset,
494498
)
495499
figs.append(fig_genes)
496500

@@ -726,6 +730,8 @@ def plot_roh(
726730
output_backend: gplt_params.output_backend = gplt_params.output_backend_default,
727731
chunks: base_params.chunks = base_params.native_chunks,
728732
inline_array: base_params.inline_array = base_params.inline_array_default,
733+
gene_labels: Optional[gplt_params.gene_labels] = None,
734+
gene_labelset: Optional[gplt_params.gene_labelset] = None,
729735
) -> gplt_params.figure:
730736
debug = self._log.debug
731737

@@ -798,6 +804,8 @@ def plot_roh(
798804
x_range=fig_het.x_range,
799805
show=False,
800806
output_backend=output_backend,
807+
gene_labels=gene_labels,
808+
gene_labelset=gene_labelset,
801809
)
802810
figs.append(fig_genes)
803811

@@ -2290,6 +2298,8 @@ def plot_xpehh_gwss(
22902298
output_backend: gplt_params.output_backend = gplt_params.output_backend_default,
22912299
chunks: base_params.chunks = base_params.native_chunks,
22922300
inline_array: base_params.inline_array = base_params.inline_array_default,
2301+
gene_labels: Optional[gplt_params.gene_labels] = None,
2302+
gene_labelset: Optional[gplt_params.gene_labelset] = None,
22932303
) -> gplt_params.figure:
22942304
# gwss track
22952305
fig1 = self.plot_xpehh_gwss_track(
@@ -2334,6 +2344,8 @@ def plot_xpehh_gwss(
23342344
x_range=fig1.x_range,
23352345
show=False,
23362346
output_backend=output_backend,
2347+
gene_labels=gene_labels,
2348+
gene_labelset=gene_labelset,
23372349
)
23382350

23392351
# combine plots into a single figure
@@ -2391,6 +2403,8 @@ def plot_ihs_gwss(
23912403
output_backend: gplt_params.output_backend = gplt_params.output_backend_default,
23922404
chunks: base_params.chunks = base_params.native_chunks,
23932405
inline_array: base_params.inline_array = base_params.inline_array_default,
2406+
gene_labels: Optional[gplt_params.gene_labels] = None,
2407+
gene_labelset: Optional[gplt_params.gene_labelset] = None,
23942408
) -> gplt_params.figure:
23952409
# gwss track
23962410
fig1 = self.plot_ihs_gwss_track(
@@ -2437,6 +2451,8 @@ def plot_ihs_gwss(
24372451
x_range=fig1.x_range,
24382452
show=False,
24392453
output_backend=output_backend,
2454+
gene_labels=gene_labels,
2455+
gene_labelset=gene_labelset,
24402456
)
24412457

24422458
# combine plots into a single figure

0 commit comments

Comments
 (0)