Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
23105a4
Replace contig with region in H12 GWSS functions and tests
leehart Dec 6, 2024
acdba78
Replace contig with region in G123 GWSS functions and tests
leehart Dec 6, 2024
226c471
Replace contig with region in iHS GWSS functions and tests
leehart Dec 6, 2024
1409756
Change iHS GWSS cache names
leehart Dec 6, 2024
3825a92
Merge branch 'master' into GH375_allow_region_instead_of_contig_param
leehart Dec 6, 2024
65c048f
Replace contig with region in FST GWSS functions and tests
leehart Dec 9, 2024
bf5b2f9
Change cache name for fst_gwss()
leehart Dec 9, 2024
ab45382
Replace contig with region in H1X GWSS functions and tests. Change ca…
leehart Dec 9, 2024
95fb659
Replace contig with region in XP-EHH GWSS functions and tests. Change…
leehart Dec 9, 2024
d7a7698
WIP: use random_region_str() for random region in GWSS function tests
leehart Dec 13, 2024
9dc4e76
Merge branch 'master' into GH375_allow_region_instead_of_contig_param
leehart Jan 17, 2025
8dfa5f8
WIP: use random contig for GWSS function tests
leehart Jan 17, 2025
e668189
Use random_region_str() instead of random contig for test_fst_gwss()
leehart Jan 17, 2025
b1e0e24
Use random_region_str() instead of random contig for test_g123_gwss_w…
leehart Jan 17, 2025
97c63f3
Replcase random contig with random region of fixed size in gwss funct…
leehart Jan 23, 2025
288d40c
Merge branch 'master' into GH375_allow_region_instead_of_contig_param
leehart Jan 31, 2025
aa41bbd
Add region_size for random_region_str in test_fst_gwss()
leehart Feb 3, 2025
2221fd7
Increase random region size to 10_000 for test_g123_gwss_with_default…
leehart Feb 3, 2025
69eec31
Support deprecated contig param in fst_gwss()
leehart Feb 3, 2025
eb463e9
Raise ValueError for missing required alternative args in fst_gwss()
leehart Feb 3, 2025
88a87cb
Fix logic bug in fst_gwss() re missing alt args
leehart Feb 3, 2025
8a0aaba
Copy locals() in fst_gwss()
leehart Feb 4, 2025
c5c5850
Merge branch 'master' into GH375_allow_region_instead_of_contig_param
leehart Jun 17, 2025
51249f1
Add _resolve_region_with_deprec_contig_param. Use in fst_gwss.
leehart Jul 3, 2025
d9c34e7
Merge branch 'master' into GH375_allow_region_instead_of_contig_param
leehart Jul 7, 2025
e014a5c
Re-remove _make_sample_period_... funcs from util.py, defined in frq_…
leehart Jul 7, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions malariagen_data/af1.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
GCS_REGION_URLS = {
"us-central1": "gs://vo_afun_release_master_us_central1",
}
XPEHH_GWSS_CACHE_NAME = "af1_xpehh_gwss_v1"
IHS_GWSS_CACHE_NAME = "af1_ihs_gwss_v1"
XPEHH_GWSS_CACHE_NAME = "af1_xpehh_gwss_v2"
IHS_GWSS_CACHE_NAME = "af1_ihs_gwss_v2"

TAXON_PALETTE = px.colors.qualitative.Plotly
TAXON_COLORS = {
Expand Down
4 changes: 2 additions & 2 deletions malariagen_data/ag3.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
GCS_REGION_URLS = {
"us-central1": "gs://vo_agam_release_master_us_central1",
}
XPEHH_GWSS_CACHE_NAME = "ag3_xpehh_gwss_v1"
IHS_GWSS_CACHE_NAME = "ag3_ihs_gwss_v1"
XPEHH_GWSS_CACHE_NAME = "ag3_xpehh_gwss_v2"
IHS_GWSS_CACHE_NAME = "ag3_ihs_gwss_v2"
VIRTUAL_CONTIGS = {
"2RL": ("2R", "2L"),
"3RL": ("3R", "3L"),
Expand Down
58 changes: 41 additions & 17 deletions malariagen_data/anoph/fst.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from .snp_data import AnophelesSnpData
from . import base_params, fst_params, gplt_params, plotly_params
from ..util import CacheMiss, check_types
from ..util import CacheMiss, check_types, _resolve_region_with_deprec_contig_param


class AnophelesFstAnalysis(
Expand All @@ -29,7 +29,7 @@ def __init__(
def _fst_gwss(
self,
*,
contig,
region,
window_size,
sample_sets,
cohort1_query,
Expand All @@ -46,7 +46,7 @@ def _fst_gwss(
):
# Compute allele counts.
ac1 = self.snp_allele_counts(
region=contig,
region=region,
sample_query=cohort1_query,
sample_query_options=sample_query_options,
sample_sets=sample_sets,
Expand All @@ -59,7 +59,7 @@ def _fst_gwss(
chunks=chunks,
)
ac2 = self.snp_allele_counts(
region=contig,
region=region,
sample_query=cohort2_query,
sample_query_options=sample_query_options,
sample_sets=sample_sets,
Expand All @@ -74,7 +74,7 @@ def _fst_gwss(

with self._spinner(desc="Load SNP positions"):
pos = self.snp_sites(
region=contig,
region=region,
field="POS",
site_mask=site_mask,
inline_array=inline_array,
Expand Down Expand Up @@ -105,10 +105,10 @@ def _fst_gwss(
)
def fst_gwss(
self,
contig: base_params.contig,
window_size: fst_params.window_size,
cohort1_query: base_params.sample_query,
cohort2_query: base_params.sample_query,
region: Optional[base_params.region] = None,
window_size: Optional[fst_params.window_size] = None,
cohort1_query: Optional[base_params.sample_query] = None,
cohort2_query: Optional[base_params.sample_query] = None,
sample_query_options: Optional[base_params.sample_query_options] = None,
sample_sets: Optional[base_params.sample_sets] = None,
site_mask: Optional[base_params.site_mask] = base_params.DEFAULT,
Expand All @@ -123,13 +123,37 @@ def fst_gwss(
inline_array: base_params.inline_array = base_params.inline_array_default,
chunks: base_params.chunks = base_params.native_chunks,
clip_min: fst_params.clip_min = 0.0,
contig: Optional[base_params.region] = None, # Deprecated
) -> Tuple[np.ndarray, np.ndarray]:
# Change this name if you ever change the behaviour of this function, to
# invalidate any previously cached data.
name = "fst_gwss_v2"
name = "fst_gwss_v3"

# Get a copy of the local variables, which will include all provided function parameters.
local_vars = locals().copy()

# Specify which quasi-positional args are required.
# Note: to avoid this, we should move towards a keyword-only version of this function.
required_args = ("window_size", "cohort1_query", "cohort2_query")

# Raise an error for any missing required args.
missing_args = []
for required_arg in required_args:
if local_vars.get(required_arg) is None:
missing_args.append(required_arg)
if missing_args:
raise ValueError(f"Missing required arguments: {missing_args}")

resolved_region = _resolve_region_with_deprec_contig_param(
region=region, contig=contig
)

# Delete original parameters to prevent accidental use.
del region
del contig

params = dict(
contig=contig,
region=resolved_region,
window_size=window_size,
cohort1_query=cohort1_query,
cohort2_query=cohort2_query,
Expand Down Expand Up @@ -164,7 +188,7 @@ def fst_gwss(
)
def plot_fst_gwss_track(
self,
contig: base_params.contig,
region: base_params.region,
window_size: fst_params.window_size,
cohort1_query: base_params.sample_query,
cohort2_query: base_params.sample_query,
Expand All @@ -190,7 +214,7 @@ def plot_fst_gwss_track(
) -> gplt_params.optional_figure:
# compute Fst
x, fst = self.fst_gwss(
contig=contig,
region=region,
window_size=window_size,
cohort_size=cohort_size,
min_cohort_size=min_cohort_size,
Expand Down Expand Up @@ -253,7 +277,7 @@ def plot_fst_gwss_track(
# tidy up the plot
fig.yaxis.axis_label = "Fst"
fig.yaxis.ticker = [0, 1]
self._bokeh_style_genome_xaxis(fig, contig)
self._bokeh_style_genome_xaxis(fig, region)

if show: # pragma: no cover
bokeh.plotting.show(fig)
Expand All @@ -270,7 +294,7 @@ def plot_fst_gwss_track(
)
def plot_fst_gwss(
self,
contig: base_params.contig,
region: base_params.region,
window_size: fst_params.window_size,
cohort1_query: base_params.sample_query,
cohort2_query: base_params.sample_query,
Expand Down Expand Up @@ -298,7 +322,7 @@ def plot_fst_gwss(
) -> gplt_params.optional_figure:
# gwss track
fig1 = self.plot_fst_gwss_track(
contig=contig,
region=region,
window_size=window_size,
cohort1_query=cohort1_query,
cohort2_query=cohort2_query,
Expand All @@ -322,7 +346,7 @@ def plot_fst_gwss(

# plot genes
fig2 = self.plot_genes(
region=contig,
region=region,
sizing_mode=sizing_mode,
width=width,
height=genes_height,
Expand Down
40 changes: 20 additions & 20 deletions malariagen_data/anoph/g123.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def __init__(
def _load_data_for_g123(
self,
*,
contig,
region,
sites,
site_mask,
sample_sets,
Expand All @@ -50,7 +50,7 @@ def _load_data_for_g123(
chunks,
):
ds_snps = self.snp_calls(
region=contig,
region=region,
sample_query=sample_query,
sample_query_options=sample_query_options,
sample_sets=sample_sets,
Expand All @@ -74,7 +74,7 @@ def _load_data_for_g123(
# of samples was used to set up the phasing analysis.
with self._spinner("Subsetting to selected sites"):
haplotype_pos = self.haplotype_sites(
region=contig,
region=region,
analysis=sites,
field="POS",
inline_array=True,
Expand Down Expand Up @@ -106,7 +106,7 @@ def _load_data_for_g123(
def _g123_gwss(
self,
*,
contig,
region,
sites,
site_mask,
window_size,
Expand All @@ -120,7 +120,7 @@ def _g123_gwss(
chunks,
):
gt, pos = self._load_data_for_g123(
contig=contig,
region=region,
sites=sites,
site_mask=site_mask,
sample_sets=sample_sets,
Expand Down Expand Up @@ -151,7 +151,7 @@ def _g123_gwss(
)
def g123_gwss(
self,
contig: base_params.contig,
region: base_params.region,
window_size: g123_params.window_size,
sites: g123_params.sites = base_params.DEFAULT,
site_mask: Optional[base_params.site_mask] = base_params.DEFAULT,
Expand All @@ -170,7 +170,7 @@ def g123_gwss(
) -> Tuple[np.ndarray, np.ndarray]:
# Change this name if you ever change the behaviour of this function, to
# invalidate any previously cached data.
name = "g123_gwss_v1"
name = "g123_gwss_v2"

if sites == base_params.DEFAULT:
assert self._default_phasing_analysis is not None
Expand All @@ -182,7 +182,7 @@ def g123_gwss(
)

params = dict(
contig=contig,
region=region,
sites=sites,
site_mask=site_mask,
window_size=window_size,
Expand Down Expand Up @@ -214,7 +214,7 @@ def g123_gwss(
def _g123_calibration(
self,
*,
contig,
region,
sites,
site_mask,
sample_query,
Expand All @@ -228,7 +228,7 @@ def _g123_calibration(
chunks,
) -> Mapping[str, np.ndarray]:
gt, _ = self._load_data_for_g123(
contig=contig,
region=region,
sites=sites,
site_mask=site_mask,
sample_query=sample_query,
Expand Down Expand Up @@ -258,7 +258,7 @@ def _g123_calibration(
)
def g123_calibration(
self,
contig: base_params.contig,
region: base_params.region,
sites: g123_params.sites = base_params.DEFAULT,
site_mask: Optional[base_params.site_mask] = base_params.DEFAULT,
sample_query: Optional[base_params.sample_query] = None,
Expand All @@ -280,7 +280,7 @@ def g123_calibration(
name = "g123_calibration_v1"

params = dict(
contig=contig,
region=region,
sites=sites,
site_mask=self._prep_optional_site_mask_param(site_mask=site_mask),
window_sizes=window_sizes,
Expand Down Expand Up @@ -312,7 +312,7 @@ def g123_calibration(
)
def plot_g123_gwss_track(
self,
contig: base_params.contig,
region: base_params.region,
window_size: g123_params.window_size,
sites: g123_params.sites = base_params.DEFAULT,
site_mask: Optional[base_params.site_mask] = base_params.DEFAULT,
Expand All @@ -338,7 +338,7 @@ def plot_g123_gwss_track(
) -> gplt_params.optional_figure:
# compute G123
x, g123 = self.g123_gwss(
contig=contig,
region=region,
sites=sites,
site_mask=site_mask,
window_size=window_size,
Expand Down Expand Up @@ -401,7 +401,7 @@ def plot_g123_gwss_track(
# tidy up the plot
fig.yaxis.axis_label = "G123"
fig.yaxis.ticker = [0, 1]
self._bokeh_style_genome_xaxis(fig, contig)
self._bokeh_style_genome_xaxis(fig, region)

if show: # pragma: no cover
bokeh.plotting.show(fig)
Expand All @@ -415,7 +415,7 @@ def plot_g123_gwss_track(
)
def plot_g123_gwss(
self,
contig: base_params.contig,
region: base_params.region,
window_size: g123_params.window_size,
sites: g123_params.sites = base_params.DEFAULT,
site_mask: Optional[base_params.site_mask] = base_params.DEFAULT,
Expand Down Expand Up @@ -443,7 +443,7 @@ def plot_g123_gwss(
) -> gplt_params.optional_figure:
# gwss track
fig1 = self.plot_g123_gwss_track(
contig=contig,
region=region,
sites=sites,
site_mask=site_mask,
window_size=window_size,
Expand All @@ -467,7 +467,7 @@ def plot_g123_gwss(

# plot genes
fig2 = self.plot_genes(
region=contig,
region=region,
sizing_mode=sizing_mode,
width=width,
height=genes_height,
Expand Down Expand Up @@ -500,7 +500,7 @@ def plot_g123_gwss(
)
def plot_g123_calibration(
self,
contig: base_params.contig,
region: base_params.region,
sites: g123_params.sites,
site_mask: Optional[base_params.site_mask] = base_params.DEFAULT,
sample_query: Optional[base_params.sample_query] = None,
Expand All @@ -521,7 +521,7 @@ def plot_g123_calibration(
) -> gplt_params.optional_figure:
# get g123 values
calibration_runs = self.g123_calibration(
contig=contig,
region=region,
sites=sites,
site_mask=site_mask,
sample_query=sample_query,
Expand Down
Loading
Loading