Skip to content

Commit 450235b

Browse files
authored
Merge branch 'master' into 600-adding-option-for-multiple-transcripts-to-diplotype_clustering
2 parents 7104cdb + 6ce9f11 commit 450235b

22 files changed

+1498
-863
lines changed

.github/workflows/label_issues.yml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
name: Label issues
2+
on:
3+
issues:
4+
types:
5+
- reopened
6+
- opened
7+
jobs:
8+
label_issues:
9+
runs-on: ubuntu-latest
10+
permissions:
11+
issues: write
12+
steps:
13+
- run: gh issue edit "$NUMBER" --add-label "$LABELS"
14+
env:
15+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
16+
GH_REPO: ${{ github.repository }}
17+
NUMBER: ${{ github.event.issue.number }}
18+
LABELS: triage

malariagen_data/af1.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ def __init__(
123123
taxon_colors=TAXON_COLORS,
124124
virtual_contigs=None,
125125
gene_names=None,
126+
inversion_tag_path=None,
126127
)
127128

128129
def __repr__(self):

malariagen_data/ag3.py

Lines changed: 2 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -2,18 +2,11 @@
22

33
import dask
44
import pandas as pd # type: ignore
5-
from pandas import CategoricalDtype
6-
import numpy as np # type: ignore
7-
import allel # type: ignore
85
import plotly.express as px # type: ignore
96

107
import malariagen_data
118
from .anopheles import AnophelesDataResource
129

13-
from numpydoc_decorator import doc
14-
from .util import check_types, _karyotype_tags_n_alt
15-
from .anoph import base_params
16-
from typing import Optional, Literal, Annotated, TypeAlias
1710

1811
# silence dask performance warnings
1912
dask.config.set(**{"array.slicing.split_native_chunks": False}) # type: ignore
@@ -35,6 +28,7 @@
3528
GENE_NAMES = {
3629
"AGAP004707": "Vgsc/para",
3730
}
31+
INVERSION_TAG_PATH = "karyotype_tag_snps.csv"
3832

3933

4034
def _setup_aim_palettes():
@@ -83,12 +77,6 @@ def _setup_aim_palettes():
8377
}
8478

8579

86-
inversion_param: TypeAlias = Annotated[
87-
Literal["2La", "2Rb", "2Rc_gam", "2Rc_col", "2Rd", "2Rj"],
88-
"Name of inversion to infer karyotype for.",
89-
]
90-
91-
9280
class Ag3(AnophelesDataResource):
9381
"""Provides access to data from Ag3.x releases.
9482
@@ -203,6 +191,7 @@ def __init__(
203191
taxon_colors=TAXON_COLORS,
204192
virtual_contigs=VIRTUAL_CONTIGS,
205193
gene_names=GENE_NAMES,
194+
inversion_tag_path=INVERSION_TAG_PATH,
206195
)
207196

208197
# set up caches
@@ -355,82 +344,3 @@ def _results_cache_add_analysis_params(self, params):
355344
super()._results_cache_add_analysis_params(params)
356345
# override parent class to add AIM analysis
357346
params["aim_analysis"] = self._aim_analysis
358-
359-
@check_types
360-
@doc(
361-
summary="Load tag SNPs for a given inversion in Ag.",
362-
)
363-
def load_inversion_tags(self, inversion: inversion_param) -> pd.DataFrame:
364-
# needs to be modified depending on where we are hosting
365-
import importlib.resources
366-
from . import resources
367-
368-
with importlib.resources.path(resources, "karyotype_tag_snps.csv") as path:
369-
df_tag_snps = pd.read_csv(path, sep=",")
370-
return df_tag_snps.query(f"inversion == '{inversion}'").reset_index()
371-
372-
@check_types
373-
@doc(
374-
summary="Infer karyotype from tag SNPs for a given inversion in Ag.",
375-
)
376-
def karyotype(
377-
self,
378-
inversion: inversion_param,
379-
sample_sets: Optional[base_params.sample_sets] = None,
380-
sample_query: Optional[base_params.sample_query] = None,
381-
sample_query_options: Optional[base_params.sample_query_options] = None,
382-
) -> pd.DataFrame:
383-
# load tag snp data
384-
df_tagsnps = self.load_inversion_tags(inversion=inversion)
385-
inversion_pos = df_tagsnps["position"]
386-
inversion_alts = df_tagsnps["alt_allele"]
387-
contig = inversion[0:2]
388-
389-
# get snp calls for inversion region
390-
start, end = np.min(inversion_pos), np.max(inversion_pos)
391-
region = f"{contig}:{start}-{end}"
392-
393-
ds_snps = self.snp_calls(
394-
region=region,
395-
sample_sets=sample_sets,
396-
sample_query=sample_query,
397-
sample_query_options=sample_query_options,
398-
)
399-
400-
with self._spinner("Inferring karyotype from tag SNPs"):
401-
# access variables we need
402-
geno = allel.GenotypeDaskArray(ds_snps["call_genotype"].data)
403-
pos = allel.SortedIndex(ds_snps["variant_position"].values)
404-
samples = ds_snps["sample_id"].values
405-
alts = ds_snps["variant_allele"].values.astype(str)
406-
407-
# subset to position of inversion tags
408-
mask = pos.locate_intersection(inversion_pos)[0]
409-
alts = alts[mask]
410-
geno = geno.compress(mask, axis=0).compute()
411-
412-
# infer karyotype
413-
gn_alt = _karyotype_tags_n_alt(
414-
gt=geno, alts=alts, inversion_alts=inversion_alts
415-
)
416-
is_called = geno.is_called()
417-
418-
# calculate mean genotype for each sample whilst masking missing calls
419-
av_gts = np.mean(np.ma.MaskedArray(gn_alt, mask=~is_called), axis=0)
420-
total_sites = np.sum(is_called, axis=0)
421-
422-
df = pd.DataFrame(
423-
{
424-
"sample_id": samples,
425-
"inversion": inversion,
426-
f"karyotype_{inversion}_mean": av_gts,
427-
# round the genotypes then convert to int
428-
f"karyotype_{inversion}": av_gts.round().astype(int),
429-
"total_tag_snps": total_sites,
430-
},
431-
)
432-
# Allow filling missing values with "<NA>" visible placeholder.
433-
kt_dtype = CategoricalDtype(categories=[0, 1, 2, "<NA>"], ordered=True)
434-
df[f"karyotype_{inversion}"] = df[f"karyotype_{inversion}"].astype(kt_dtype)
435-
436-
return df

malariagen_data/anoph/cnv_frq.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,24 +10,25 @@
1010
from numpydoc_decorator import doc # type: ignore
1111

1212
from . import base_params, cnv_params, frq_params
13+
from .frq_base import (
14+
prep_samples_for_cohort_grouping,
15+
build_cohorts_from_sample_grouping,
16+
add_frequency_ci,
17+
)
1318
from ..util import (
1419
check_types,
1520
pandas_apply,
1621
Region,
1722
parse_multi_region,
1823
region_str,
1924
simple_xarray_concat,
20-
prep_samples_for_cohort_grouping,
21-
build_cohorts_from_sample_grouping,
22-
add_frequency_ci,
2325
)
2426
from .cnv_data import AnophelesCnvData
27+
from .frq_base import AnophelesFrequencyAnalysis
2528
from .sample_metadata import locate_cohorts
2629

2730

28-
class AnophelesCnvFrequencyAnalysis(
29-
AnophelesCnvData,
30-
):
31+
class AnophelesCnvFrequencyAnalysis(AnophelesCnvData, AnophelesFrequencyAnalysis):
3132
def __init__(
3233
self,
3334
**kwargs,

malariagen_data/anoph/distance.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -421,7 +421,7 @@ def plot_njt(
421421
height: plotly_params.fig_height = 600,
422422
show: plotly_params.show = True,
423423
renderer: plotly_params.renderer = None,
424-
render_mode: plotly_params.render_mode = "auto",
424+
render_mode: plotly_params.render_mode = "svg",
425425
title: plotly_params.title = True,
426426
title_font_size: plotly_params.title_font_size = 14,
427427
line_width: plotly_params.line_width = 0.5,

0 commit comments

Comments
 (0)