Skip to content

Commit 188e0a6

Browse files
authored
Merge branch 'master' into GH756-mohamed-laarej-shadow
2 parents 3475ee6 + a39a3c1 commit 188e0a6

File tree

5 files changed

+31
-12
lines changed

5 files changed

+31
-12
lines changed

malariagen_data/anoph/frq_base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,7 @@ def plot_frequencies_heatmap(
210210

211211
# Indexing.
212212
if index is None:
213-
index = list(df.index.names)
213+
index = [str(name) for name in df.index.names]
214214
df = df.reset_index().copy()
215215
if isinstance(index, list):
216216
index_col = (

malariagen_data/anoph/sample_metadata.py

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
Sequence,
1111
Tuple,
1212
Union,
13+
Hashable,
14+
cast
1315
)
1416

1517
import ipyleaflet # type: ignore
@@ -49,7 +51,7 @@ def __init__(
4951
# data resources, and so column names and dtype need to be
5052
# passed in as parameters.
5153
self._aim_metadata_columns: Optional[List[str]] = None
52-
self._aim_metadata_dtype: Dict[str, Any] = dict()
54+
self._aim_metadata_dtype: Dict[str, Union[str, type, np.dtype]] = dict()
5355
if isinstance(aim_metadata_dtype, Mapping):
5456
self._aim_metadata_columns = list(aim_metadata_dtype.keys())
5557
self._aim_metadata_dtype.update(aim_metadata_dtype)
@@ -150,7 +152,19 @@ def _parse_general_metadata(
150152
"longitude": "float64",
151153
"sex_call": "object",
152154
}
153-
df = pd.read_csv(io.BytesIO(data), dtype=dtype, na_values="")
155+
# Mapping of string dtypes to actual dtypes
156+
dtype_map = {
157+
"object": str,
158+
"int64": np.int64,
159+
"float64": np.float64,
160+
}
161+
162+
# Convert string dtypes to actual dtypes
163+
dtype_fixed: Mapping[Hashable, Union[str, np.dtype, type]] = {
164+
col: dtype_map.get(dtype[col], str) for col in dtype
165+
}
166+
167+
df = pd.read_csv(io.BytesIO(data), dtype=dtype_fixed, na_values="")
154168

155169
# Ensure all column names are lower case.
156170
df.columns = [c.lower() for c in df.columns] # type: ignore
@@ -470,7 +484,12 @@ def _parse_aim_metadata(
470484
if isinstance(data, bytes):
471485
# Parse CSV data.
472486
df = pd.read_csv(
473-
io.BytesIO(data), dtype=self._aim_metadata_dtype, na_values=""
487+
io.BytesIO(data),
488+
dtype=cast(
489+
Mapping[Hashable, Union[str, type, np.dtype]],
490+
self._aim_metadata_dtype,
491+
),
492+
na_values="",
474493
)
475494

476495
# Ensure all column names are lower case.

malariagen_data/anoph/snp_frq.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
import allel # type: ignore
55
import numpy as np
6+
import numpy.typing as npt
67
import pandas as pd
78
from numpydoc_decorator import doc # type: ignore
89
import xarray as xr
@@ -517,8 +518,8 @@ def snp_allele_frequencies_advanced(
517518

518519
# Set up main event variables.
519520
n_variants, n_cohorts = len(variant_position), len(df_cohorts)
520-
count = np.zeros((n_variants, n_cohorts), dtype=int)
521-
nobs = np.zeros((n_variants, n_cohorts), dtype=int)
521+
count: npt.NDArray[np.float64] = np.zeros((n_variants, n_cohorts), dtype=int)
522+
nobs: npt.NDArray[np.float64] = np.zeros((n_variants, n_cohorts), dtype=int)
522523

523524
# Build event count and nobs for each cohort.
524525
cohorts_iterator = self._progress(

malariagen_data/plasmodium.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import json
2-
import os
32

43
import dask.array as da
54
import pandas as pd
@@ -60,7 +59,7 @@ def sample_metadata(self):
6059
One row per sample.
6160
"""
6261
if self._cache_sample_metadata is None:
63-
path = os.path.join(self._path, self.CONF["metadata_path"])
62+
path = f"{self._path}/{self.CONF['metadata_path']}"
6463
with self._fs.open(path) as f:
6564
self._cache_sample_metadata = pd.read_csv(f, sep="\t", na_values="")
6665
return self._cache_sample_metadata
@@ -75,7 +74,7 @@ def _open_variant_calls_zarr(self):
7574
7675
"""
7776
if self._cache_variant_calls_zarr is None:
78-
path = os.path.join(self._path, self.CONF["variant_calls_zarr_path"])
77+
path = f"{self._path}/{self.CONF['variant_calls_zarr_path']}"
7978
store = init_zarr_store(fs=self._fs, path=path)
8079
self._cache_variant_calls_zarr = zarr.open_consolidated(store=store)
8180
return self._cache_variant_calls_zarr
@@ -205,7 +204,7 @@ def open_genome(self):
205204
206205
"""
207206
if self._cache_genome is None:
208-
path = os.path.join(self._path, self.CONF["reference_path"])
207+
path = f"{self._path}/{self.CONF['reference_path']}"
209208
store = init_zarr_store(fs=self._fs, path=path)
210209
self._cache_genome = zarr.open_consolidated(store=store)
211210
return self._cache_genome
@@ -317,7 +316,7 @@ def genome_features(self, attributes=("ID", "Parent", "Name")):
317316
try:
318317
df = self._cache_genome_features[attributes]
319318
except KeyError:
320-
path = os.path.join(self._path, self.CONF["annotations_path"])
319+
path = f"{self._path}/{self.CONF['annotations_path']}"
321320
with self._fs.open(path, mode="rb") as f:
322321
df = read_gff3(f, compression="gzip")
323322
if attributes is not None:

tests/anoph/test_snp_frq.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ def test_snp_effects(fixture, api: AnophelesSnpFrequencyAnalysis):
161161

162162
# Check some values.
163163
assert np.all(df["contig"] == transcript["contig"])
164-
position = df["position"].values
164+
position = df["position"].to_numpy()
165165
assert np.all(position >= transcript["start"])
166166
assert np.all(position <= transcript["end"])
167167
assert np.all(position[1:] >= position[:-1])

0 commit comments

Comments
 (0)