diff --git a/admixture_tests.py b/admixture_tests.py new file mode 100644 index 000000000..41398bdd1 --- /dev/null +++ b/admixture_tests.py @@ -0,0 +1,107 @@ +# import itertools +import random +import pytest +from pytest_cases import parametrize_with_cases +import numpy as np +# import bokeh.models +# import pandas as pd +# import plotly.graph_objects as go + +from malariagen_data import af1 as _af1 +from malariagen_data import ag3 as _ag3 +from malariagen_data.anoph.admixture_stats import AnophelesAdmixtureAnalysis + + +@pytest.fixture +def ag3_sim_api(ag3_sim_fixture): + return AnophelesAdmixtureAnalysis( + url=ag3_sim_fixture.url, + config_path=_ag3.CONFIG_PATH, + major_version_number=_ag3.MAJOR_VERSION_NUMBER, + major_version_path=_ag3.MAJOR_VERSION_PATH, + pre=True, + aim_metadata_dtype={ + "aim_species_fraction_arab": "float64", + "aim_species_fraction_colu": "float64", + "aim_species_fraction_colu_no2l": "float64", + "aim_species_gambcolu_arabiensis": object, + "aim_species_gambiae_coluzzii": object, + "aim_species": object, + }, + gff_gene_type="gene", + gff_gene_name_attribute="Name", + gff_default_attributes=("ID", "Parent", "Name", "description"), + default_site_mask="gamb_colu_arab", + results_cache=ag3_sim_fixture.results_cache_path.as_posix(), + taxon_colors=_ag3.TAXON_COLORS, + virtual_contigs=_ag3.VIRTUAL_CONTIGS, + ) + + +@pytest.fixture +def af1_sim_api(af1_sim_fixture): + return AnophelesAdmixtureAnalysis( + url=af1_sim_fixture.url, + config_path=_af1.CONFIG_PATH, + major_version_number=_af1.MAJOR_VERSION_NUMBER, + major_version_path=_af1.MAJOR_VERSION_PATH, + pre=False, + gff_gene_type="protein_coding_gene", + gff_gene_name_attribute="Note", + gff_default_attributes=("ID", "Parent", "Note", "description"), + default_site_mask="funestus", + results_cache=af1_sim_fixture.results_cache_path.as_posix(), + taxon_colors=_af1.TAXON_COLORS, + ) + + +# N.B., here we use pytest_cases to parametrize tests. Each +# function whose name begins with "case_" defines a set of +# inputs to the test functions. See the documentation for +# pytest_cases for more information, e.g.: +# +# https://smarie.github.io/python-pytest-cases/#basic-usage +# +# We use this approach here because we want to use fixtures +# as test parameters, which is otherwise hard to do with +# pytest alone. + + +def case_ag3_sim(ag3_sim_fixture, ag3_sim_api): + return ag3_sim_fixture, ag3_sim_api + + +def case_af1_sim(af1_sim_fixture, af1_sim_api): + return af1_sim_fixture, af1_sim_api + + +@parametrize_with_cases("fixture,api", cases=".") +def test_patterson_f3(fixture, api: AnophelesAdmixtureAnalysis): + # Set up test parameters. + all_taxon = api.sample_metadata()["taxon"].to_list() + taxon = random.sample(all_taxon, 3) + recipient_query = f"taxon == {taxon[0]!r}" + source1_query = f"taxon == {taxon[1]!r}" + source2_query = f"taxon == {taxon[2]!r}" + admixture_params = dict( + region=random.choice(api.contigs), + recipient_query=recipient_query, + source1_query=source1_query, + source2_query=source2_query, + site_mask=random.choice(api.site_mask_ids), + segregating_mode=random.choice(["recipient", "all"]), + n_jack=random.randint(10, 200), + min_cohort_size=1, + max_cohort_size=50, + ) + + # Run patterson f3 function under test. + f3, se, z = api.patterson_f3(**admixture_params) + + # Check results. + assert isinstance(f3, np.float64) + assert isinstance(se, np.float64) + assert isinstance(z, np.float64) + assert -0.1 <= f3 <= 1 + assert 0 <= se <= 1 + # assert -50 <= z <= 50 The sample is taken from across all data and it seems this value can be very large. diff --git a/malariagen_data/anoph/admixture_stats.py b/malariagen_data/anoph/admixture_stats.py new file mode 100644 index 000000000..b4ae6d295 --- /dev/null +++ b/malariagen_data/anoph/admixture_stats.py @@ -0,0 +1,273 @@ +from typing import Tuple, Optional + +import allel +import numpy as np +import numba + +from . import base_params +from . import admixture_stats_params +from .snp_data import AnophelesSampleMetadata, AnophelesSnpData + + +@numba.njit +def _remap_observed(ac_full): + """Remap allele indices to remove unobserved alleles.""" + n_variants = ac_full.shape[0] + n_alleles = ac_full.shape[1] + # Create the output array - this is an allele mapping array, + # that specifies how to recode allele indices. + mapping = np.empty((n_variants, n_alleles), dtype=np.int32) + mapping[:] = -1 + # Iterate over variants. + for i in range(n_variants): + # This will be the new index that we are mapping this allele to, if the + # allele count is not zero. + j_out = 0 + # Iterate over columns (alleles) in the input array. + for j in range(n_alleles): + # Access the count for the jth allele. + c = ac_full[i, j] + if c > 0: + # We have found a non-zero allele count, remap the allele. + mapping[i, j] = j_out + j_out += 1 + return mapping + + +class AnophelesAdmixtureAnalysis( + AnophelesSnpData, + AnophelesSampleMetadata, +): + def __init__( + self, + **kwargs, + ): + # N.B., this class is designed to work cooperatively, and + # so it's important that any remaining parameters are passed + # to the superclass constructor. + super().__init__(**kwargs) + + def patterson_f3( + self, + recipient_query: base_params.sample_query, + source1_query: base_params.sample_query, + source2_query: base_params.sample_query, + region: base_params.region, + site_mask: Optional[base_params.site_mask] = base_params.DEFAULT, + n_jack: base_params.n_jack = 200, + max_missing_an: base_params.max_missing_an = 0, + segregating_mode: admixture_stats_params.segregating_mode = "recipient", + cohort_size: Optional[ + base_params.cohort_size + ] = admixture_stats_params.cohort_size_default, + min_cohort_size: Optional[ + base_params.min_cohort_size + ] = admixture_stats_params.min_cohort_size_default, + max_cohort_size: Optional[ + base_params.max_cohort_size + ] = admixture_stats_params.max_cohort_size_default, + ) -> Tuple[float, float, float]: + # Purely for conciseness, internally here we use the scikit-allel convention + # of labelling the recipient population "C" and the source populations as + # "A" and "B". + + # Compute allele counts for the three cohorts. + acc = self.snp_allele_counts( + sample_query=recipient_query, + region=region, + site_mask=site_mask, + cohort_size=cohort_size, + min_cohort_size=min_cohort_size, + max_cohort_size=max_cohort_size, + ) + aca = self.snp_allele_counts( + sample_query=source1_query, + region=region, + site_mask=site_mask, + cohort_size=cohort_size, + min_cohort_size=min_cohort_size, + max_cohort_size=max_cohort_size, + ) + acb = self.snp_allele_counts( + sample_query=source2_query, + region=region, + site_mask=site_mask, + cohort_size=cohort_size, + min_cohort_size=min_cohort_size, + max_cohort_size=max_cohort_size, + ) + + # Locate biallelic variants and deal with missingness. + ac = acc + aca + acb + loc_bi = allel.AlleleCountsArray(ac).is_biallelic() + anc = np.sum(acc, axis=1) + ana = np.sum(aca, axis=1) + anb = np.sum(acb, axis=1) + an = np.sum(ac, axis=1) # no. required for sample size + n_chroms = an.max() + an_missing = n_chroms - an + # In addition to applying the max_missing_an threshold, also make sure that + # all three cohorts have nonzero allele counts. + loc_nomiss = (an_missing <= max_missing_an) & (anc > 0) & (ana > 0) & (anb > 0) + loc_sites = loc_bi & loc_nomiss + acc_bi = acc[loc_sites] + aca_bi = aca[loc_sites] + acb_bi = acb[loc_sites] + ac_bi = ac[loc_sites] + + # Squeeze the allele counts so we end up with only two columns. + sqz_mapping = _remap_observed(ac_bi) + acc_sqz = allel.AlleleCountsArray(acc_bi).map_alleles(sqz_mapping, max_allele=1) + aca_sqz = allel.AlleleCountsArray(aca_bi).map_alleles(sqz_mapping, max_allele=1) + acb_sqz = allel.AlleleCountsArray(acb_bi).map_alleles(sqz_mapping, max_allele=1) + + # Keep only segregating variants. + if segregating_mode == "recipient": + loc_seg = acc_sqz.is_segregating() + elif segregating_mode == "all": + loc_seg = ( + acc_sqz.is_segregating() + & aca_sqz.is_segregating() + & acb_sqz.is_segregating() + ) + else: + raise ValueError("Invalid value for 'segregating_mode' parameter.") + if loc_seg.sum() == 0: + raise ValueError("No segregating variants found") + else: + print(f"Using {loc_seg.sum()} segregating variants for F3 calculation.") + acc_seg = acc_sqz[loc_seg] + aca_seg = aca_sqz[loc_seg] + acb_seg = acb_sqz[loc_seg] + + # Compute f3 statistic. + blen = acc_seg.shape[0] // n_jack + f3, se, z, _, _ = allel.average_patterson_f3( + acc_seg, + aca_seg, + acb_seg, + blen=blen, + normed=True, + ) + + return f3, se, z + + def patterson_f4( + self, + a_query: base_params.sample_query, + b_query: base_params.sample_query, + c_query: base_params.sample_query, + d_query: base_params.sample_query, + region: base_params.region, + site_mask: Optional[base_params.site_mask] = base_params.DEFAULT, + n_jack: base_params.n_jack = 200, + max_missing_an: base_params.max_missing_an = 0, + segregating_mode: admixture_stats_params.segregating_mode = "recipient", + cohort_size: Optional[ + base_params.cohort_size + ] = admixture_stats_params.cohort_size_default, + min_cohort_size: Optional[ + base_params.min_cohort_size + ] = admixture_stats_params.min_cohort_size_default, + max_cohort_size: Optional[ + base_params.max_cohort_size + ] = admixture_stats_params.max_cohort_size_default, + ) -> Tuple[float, float, float]: + # Purely for conciseness, internally here we use the scikit-allel convention + # of labelling the recipient population "C" and the source populations as + # "A" and "B". + + # Compute allele counts for the three cohorts. + aca = self.snp_allele_counts( + sample_query=a_query, + region=region, + site_mask=site_mask, + cohort_size=cohort_size, + min_cohort_size=min_cohort_size, + max_cohort_size=max_cohort_size, + ) + + acb = self.snp_allele_counts( + sample_query=b_query, + region=region, + site_mask=site_mask, + cohort_size=cohort_size, + min_cohort_size=min_cohort_size, + max_cohort_size=max_cohort_size, + ) + + acc = self.snp_allele_counts( + sample_query=c_query, + region=region, + site_mask=site_mask, + cohort_size=cohort_size, + min_cohort_size=min_cohort_size, + max_cohort_size=max_cohort_size, + ) + + acd = self.snp_allele_counts( + sample_query=d_query, + region=region, + site_mask=site_mask, + cohort_size=cohort_size, + min_cohort_size=min_cohort_size, + max_cohort_size=max_cohort_size, + ) + + # Locate biallelic variants and deal with missingness. + ac = acc + aca + acb + loc_bi = allel.AlleleCountsArray(ac).is_biallelic() + ana = np.sum(acc, axis=1) + anb = np.sum(aca, axis=1) + anc = np.sum(acb, axis=1) + an = np.sum(ac, axis=1) # no. required for sample size + n_chroms = an.max() + an_missing = n_chroms - an + # In addition to applying the max_missing_an threshold, also make sure that + # all three cohorts have nonzero allele counts. + loc_nomiss = (an_missing <= max_missing_an) & (anc > 0) & (ana > 0) & (anb > 0) + loc_sites = loc_bi & loc_nomiss + aca_bi = acc[loc_sites] + acb_bi = aca[loc_sites] + acc_bi = acb[loc_sites] + acd_bi = acd[loc_sites] + ac_bi = aca_bi + acb_bi + acc_bi + acd_bi + + # Squeeze the allele counts so we end up with only two columns. + sqz_mapping = _remap_observed(ac_bi) + aca_sqz = allel.AlleleCountsArray(acc_bi).map_alleles(sqz_mapping, max_allele=1) + acb_sqz = allel.AlleleCountsArray(aca_bi).map_alleles(sqz_mapping, max_allele=1) + acc_sqz = allel.AlleleCountsArray(acb_bi).map_alleles(sqz_mapping, max_allele=1) + acd_sqz = allel.AlleleCountsArray(acd_bi).map_alleles(sqz_mapping, max_allele=1) + + # Keep only segregating variants. + if segregating_mode == "recipient": + loc_seg = acc_sqz.is_segregating() + elif segregating_mode == "all": + loc_seg = ( + aca_sqz.is_segregating() + & acb_sqz.is_segregating() + & acc_sqz.is_segregating() + ) + else: + raise ValueError("Invalid value for 'segregating_mode' parameter.") + if loc_seg.sum() == 0: + raise ValueError("No segregating variants found") + else: + print(f"Using {loc_seg.sum()} segregating variants for F3 calculation.") + aca_seg = acc_sqz[loc_seg] + acb_seg = aca_sqz[loc_seg] + acc_seg = acb_sqz[loc_seg] + acd_seg = acd_sqz[loc_seg] + + # Compute f3 statistic. + blen = acc_seg.shape[0] // n_jack + f4, se, z, _, _ = allel.average_patterson_d( + aca_seg, + acb_seg, + acc_seg, + acd_seg, + blen=blen, + ) + + return f4, se, z diff --git a/malariagen_data/anoph/admixture_stats_params.py b/malariagen_data/anoph/admixture_stats_params.py new file mode 100644 index 000000000..febd241ba --- /dev/null +++ b/malariagen_data/anoph/admixture_stats_params.py @@ -0,0 +1,18 @@ +"""Parameter definitions for admixture functions.""" + +from typing import Optional +import typing_extensions + +from . import base_params + +cohort_size_default: Optional[base_params.cohort_size] = None +min_cohort_size_default: base_params.min_cohort_size = 10 +max_cohort_size_default: base_params.max_cohort_size = 50 + +segregating_mode: typing_extensions.TypeAlias = typing_extensions.Annotated[ + str, + """ + Define the way segregating variants are chosen. Available options are "recipient" or "all" to use + sites segregating within both the recipient and source population cohorts. + """, +] diff --git a/malariagen_data/anopheles.py b/malariagen_data/anopheles.py index 0aec9bd9a..04d49a25d 100644 --- a/malariagen_data/anopheles.py +++ b/malariagen_data/anopheles.py @@ -39,6 +39,7 @@ from .anoph.to_plink import PlinkConverter from .anoph.g123 import AnophelesG123Analysis from .anoph.fst import AnophelesFstAnalysis +from .anoph.admixture_stats import AnophelesAdmixtureAnalysis from .anoph.h12 import AnophelesH12Analysis from .anoph.h1x import AnophelesH1XAnalysis from .mjn import median_joining_network, mjn_graph @@ -82,6 +83,7 @@ class AnophelesDataResource( AnophelesH12Analysis, AnophelesG123Analysis, AnophelesFstAnalysis, + AnophelesAdmixtureAnalysis, AnophelesHapFrequencyAnalysis, AnophelesDistanceAnalysis, AnophelesPca, diff --git a/notebooks/admixture_stats.ipynb b/notebooks/admixture_stats.ipynb new file mode 100644 index 000000000..f02c27448 --- /dev/null +++ b/notebooks/admixture_stats.ipynb @@ -0,0 +1,359 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import malariagen_data" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "application/javascript": "'use strict';\n(function(root) {\n function now() {\n return new Date();\n }\n\n const force = true;\n\n if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n root._bokeh_onload_callbacks = [];\n root._bokeh_is_loading = undefined;\n }\n\nconst JS_MIME_TYPE = 'application/javascript';\n const HTML_MIME_TYPE = 'text/html';\n const EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n const CLASS_NAME = 'output_bokeh rendered_html';\n\n /**\n * Render data to the DOM node\n */\n function render(props, node) {\n const script = document.createElement(\"script\");\n node.appendChild(script);\n }\n\n /**\n * Handle when an output is cleared or removed\n */\n function handleClearOutput(event, handle) {\n function drop(id) {\n const view = Bokeh.index.get_by_id(id)\n if (view != null) {\n view.model.document.clear()\n Bokeh.index.delete(view)\n }\n }\n\n const cell = handle.cell;\n\n const id = cell.output_area._bokeh_element_id;\n const server_id = cell.output_area._bokeh_server_id;\n\n // Clean up Bokeh references\n if (id != null) {\n drop(id)\n }\n\n if (server_id !== undefined) {\n // Clean up Bokeh references\n const cmd_clean = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n cell.notebook.kernel.execute(cmd_clean, {\n iopub: {\n output: function(msg) {\n const id = msg.content.text.trim()\n drop(id)\n }\n }\n });\n // Destroy server and session\n const cmd_destroy = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n cell.notebook.kernel.execute(cmd_destroy);\n }\n }\n\n /**\n * Handle when a new output is added\n */\n function handleAddOutput(event, handle) {\n const output_area = handle.output_area;\n const output = handle.output;\n\n // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n if ((output.output_type != \"display_data\") || (!Object.prototype.hasOwnProperty.call(output.data, EXEC_MIME_TYPE))) {\n return\n }\n\n const toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n\n if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n // store reference to embed id on output_area\n output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n }\n if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n const bk_div = document.createElement(\"div\");\n bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n const script_attrs = bk_div.children[0].attributes;\n for (let i = 0; i < script_attrs.length; i++) {\n toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n toinsert[toinsert.length - 1].firstChild.textContent = bk_div.children[0].textContent\n }\n // store reference to server id on output_area\n output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n }\n }\n\n function register_renderer(events, OutputArea) {\n\n function append_mime(data, metadata, element) {\n // create a DOM node to render to\n const toinsert = this.create_output_subarea(\n metadata,\n CLASS_NAME,\n EXEC_MIME_TYPE\n );\n this.keyboard_manager.register_events(toinsert);\n // Render to node\n const props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n render(props, toinsert[toinsert.length - 1]);\n element.append(toinsert);\n return toinsert\n }\n\n /* Handle when an output is cleared or removed */\n events.on('clear_output.CodeCell', handleClearOutput);\n events.on('delete.Cell', handleClearOutput);\n\n /* Handle when a new output is added */\n events.on('output_added.OutputArea', handleAddOutput);\n\n /**\n * Register the mime type and append_mime function with output_area\n */\n OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n /* Is output safe? */\n safe: true,\n /* Index of renderer in `output_area.display_order` */\n index: 0\n });\n }\n\n // register the mime type if in Jupyter Notebook environment and previously unregistered\n if (root.Jupyter !== undefined) {\n const events = require('base/js/events');\n const OutputArea = require('notebook/js/outputarea').OutputArea;\n\n if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n register_renderer(events, OutputArea);\n }\n }\n if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n const NB_LOAD_WARNING = {'data': {'text/html':\n \"
\\n\"+\n \"

\\n\"+\n \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n \"

\\n\"+\n \"\\n\"+\n \"\\n\"+\n \"from bokeh.resources import INLINE\\n\"+\n \"output_notebook(resources=INLINE)\\n\"+\n \"\\n\"+\n \"
\"}};\n\n function display_loaded(error = null) {\n const el = document.getElementById(null);\n if (el != null) {\n const html = (() => {\n if (typeof root.Bokeh === \"undefined\") {\n if (error == null) {\n return \"BokehJS is loading ...\";\n } else {\n return \"BokehJS failed to load.\";\n }\n } else {\n const prefix = `BokehJS ${root.Bokeh.version}`;\n if (error == null) {\n return `${prefix} successfully loaded.`;\n } else {\n return `${prefix} encountered errors while loading and may not function as expected.`;\n }\n }\n })();\n el.innerHTML = html;\n\n if (error != null) {\n const wrapper = document.createElement(\"div\");\n wrapper.style.overflow = \"auto\";\n wrapper.style.height = \"5em\";\n wrapper.style.resize = \"vertical\";\n const content = document.createElement(\"div\");\n content.style.fontFamily = \"monospace\";\n content.style.whiteSpace = \"pre-wrap\";\n content.style.backgroundColor = \"rgb(255, 221, 221)\";\n content.textContent = error.stack ?? error.toString();\n wrapper.append(content);\n el.append(wrapper);\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(() => display_loaded(error), 100);\n }\n }\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n\n root._bokeh_onload_callbacks.push(callback);\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls == null || js_urls.length === 0) {\n run_callbacks();\n return null;\n }\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n root._bokeh_is_loading = css_urls.length + js_urls.length;\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n\n function on_error(url) {\n console.error(\"failed to load \" + url);\n }\n\n for (let i = 0; i < css_urls.length; i++) {\n const url = css_urls[i];\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error.bind(null, url);\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n }\n\n for (let i = 0; i < js_urls.length; i++) {\n const url = js_urls[i];\n const element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error.bind(null, url);\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n const js_urls = [\"https://cdn.bokeh.org/bokeh/release/bokeh-3.6.2.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-3.6.2.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-3.6.2.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-3.6.2.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-mathjax-3.6.2.min.js\"];\n const css_urls = [];\n\n const inline_js = [ function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\nfunction(Bokeh) {\n }\n ];\n\n function run_inline_js() {\n if (root.Bokeh !== undefined || force === true) {\n try {\n for (let i = 0; i < inline_js.length; i++) {\n inline_js[i].call(root, root.Bokeh);\n }\n\n } catch (error) {throw error;\n }} else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n } else if (force !== true) {\n const cell = $(document.getElementById(null)).parents('.cell').data().cell;\n cell.output_area.append_execute_result(NB_LOAD_WARNING)\n }\n }\n\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n run_inline_js();\n } else {\n load_libs(css_urls, js_urls, function() {\n console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n run_inline_js();\n });\n }\n}(window));", + "application/vnd.bokehjs_load.v0+json": "" + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MalariaGEN Ag3 API client
\n", + " Please note that data are subject to terms of use,\n", + " for more information see \n", + " the MalariaGEN website or contact support@malariagen.net.\n", + " See also the Ag3 API docs.\n", + "
\n", + " Storage URL\n", + " simplecache::gs://vo_agam_release_master_us_central1
\n", + " Data releases available\n", + " 3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, 3.10, 3.11, 3.12, 3.13, 3.14
\n", + " Results cache\n", + " None
\n", + " Cohorts analysis\n", + " 20250131
\n", + " AIM analysis\n", + " 20220528
\n", + " Site filters analysis\n", + " dt_20200416
\n", + " Software version\n", + " malariagen_data 0.0.0
\n", + " Client location\n", + " England, United Kingdom
\n", + " " + ], + "text/plain": [ + "\n", + "Storage URL : simplecache::gs://vo_agam_release_master_us_central1\n", + "Data releases available : 3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, 3.10, 3.11, 3.12, 3.13, 3.14\n", + "Results cache : None\n", + "Cohorts analysis : 20250131\n", + "AIM analysis : 20220528\n", + "Site filters analysis : dt_20200416\n", + "Software version : malariagen_data 0.0.0\n", + "Client location : England, United Kingdom\n", + "---\n", + "Please note that data are subject to terms of use,\n", + "for more information see https://www.malariagen.net/data\n", + "or contact support@malariagen.net. For API documentation see \n", + "https://malariagen.github.io/malariagen-data-python/v0.0.0/Ag3.html" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ag3 = malariagen_data.Ag3(\n", + " \"simplecache::gs://vo_agam_release_master_us_central1\",\n", + " simplecache=dict(cache_storage=\"../gcs_cache\"),\n", + ")\n", + "ag3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "17b1973b51ec4315a7fa5caa8514e0c1", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Compute SNP allele counts: 0%| | 0/48 [00:00