Skip to content

Commit a39a3c1

Browse files
authored
Merge pull request #786 from malariagen/fix-windows-path-mainrepo
Fix Windows path compatibility in cloud storage URLs
2 parents a8009a8 + eb7c5d5 commit a39a3c1

File tree

6 files changed

+43
-14
lines changed

6 files changed

+43
-14
lines changed

malariagen_data/anoph/frq_base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,7 @@ def plot_frequencies_heatmap(
210210

211211
# Indexing.
212212
if index is None:
213-
index = list(df.index.names)
213+
index = [str(name) for name in df.index.names]
214214
df = df.reset_index().copy()
215215
if isinstance(index, list):
216216
index_col = (

malariagen_data/anoph/sample_metadata.py

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,18 @@
11
import io
22
from itertools import cycle
3-
from typing import Any, Callable, Dict, List, Mapping, Optional, Sequence, Tuple, Union
3+
from typing import (
4+
Any,
5+
Callable,
6+
Dict,
7+
List,
8+
Mapping,
9+
Optional,
10+
Sequence,
11+
Tuple,
12+
Union,
13+
Hashable,
14+
cast,
15+
)
416

517
import ipyleaflet # type: ignore
618
import numpy as np
@@ -39,7 +51,7 @@ def __init__(
3951
# data resources, and so column names and dtype need to be
4052
# passed in as parameters.
4153
self._aim_metadata_columns: Optional[List[str]] = None
42-
self._aim_metadata_dtype: Dict[str, Any] = dict()
54+
self._aim_metadata_dtype: Dict[str, Union[str, type, np.dtype]] = dict()
4355
if isinstance(aim_metadata_dtype, Mapping):
4456
self._aim_metadata_columns = list(aim_metadata_dtype.keys())
4557
self._aim_metadata_dtype.update(aim_metadata_dtype)
@@ -140,7 +152,19 @@ def _parse_general_metadata(
140152
"longitude": "float64",
141153
"sex_call": "object",
142154
}
143-
df = pd.read_csv(io.BytesIO(data), dtype=dtype, na_values="")
155+
# Mapping of string dtypes to actual dtypes
156+
dtype_map = {
157+
"object": str,
158+
"int64": np.int64,
159+
"float64": np.float64,
160+
}
161+
162+
# Convert string dtypes to actual dtypes
163+
dtype_fixed: Mapping[Hashable, Union[str, np.dtype, type]] = {
164+
col: dtype_map.get(dtype[col], str) for col in dtype
165+
}
166+
167+
df = pd.read_csv(io.BytesIO(data), dtype=dtype_fixed, na_values="")
144168

145169
# Ensure all column names are lower case.
146170
df.columns = [c.lower() for c in df.columns] # type: ignore
@@ -460,7 +484,12 @@ def _parse_aim_metadata(
460484
if isinstance(data, bytes):
461485
# Parse CSV data.
462486
df = pd.read_csv(
463-
io.BytesIO(data), dtype=self._aim_metadata_dtype, na_values=""
487+
io.BytesIO(data),
488+
dtype=cast(
489+
Mapping[Hashable, Union[str, type, np.dtype]],
490+
self._aim_metadata_dtype,
491+
),
492+
na_values="",
464493
)
465494

466495
# Ensure all column names are lower case.

malariagen_data/anoph/snp_frq.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
import allel # type: ignore
55
import numpy as np
6+
import numpy.typing as npt
67
import pandas as pd
78
from numpydoc_decorator import doc # type: ignore
89
import xarray as xr
@@ -518,8 +519,8 @@ def snp_allele_frequencies_advanced(
518519

519520
# Set up main event variables.
520521
n_variants, n_cohorts = len(variant_position), len(df_cohorts)
521-
count = np.zeros((n_variants, n_cohorts), dtype=int)
522-
nobs = np.zeros((n_variants, n_cohorts), dtype=int)
522+
count: npt.NDArray[np.float64] = np.zeros((n_variants, n_cohorts), dtype=int)
523+
nobs: npt.NDArray[np.float64] = np.zeros((n_variants, n_cohorts), dtype=int)
523524

524525
# Build event count and nobs for each cohort.
525526
cohorts_iterator = self._progress(

malariagen_data/plasmodium.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import json
2-
import os
32

43
import dask.array as da
54
import pandas as pd
@@ -60,7 +59,7 @@ def sample_metadata(self):
6059
One row per sample.
6160
"""
6261
if self._cache_sample_metadata is None:
63-
path = os.path.join(self._path, self.CONF["metadata_path"])
62+
path = f"{self._path}/{self.CONF['metadata_path']}"
6463
with self._fs.open(path) as f:
6564
self._cache_sample_metadata = pd.read_csv(f, sep="\t", na_values="")
6665
return self._cache_sample_metadata
@@ -75,7 +74,7 @@ def _open_variant_calls_zarr(self):
7574
7675
"""
7776
if self._cache_variant_calls_zarr is None:
78-
path = os.path.join(self._path, self.CONF["variant_calls_zarr_path"])
77+
path = f"{self._path}/{self.CONF['variant_calls_zarr_path']}"
7978
store = init_zarr_store(fs=self._fs, path=path)
8079
self._cache_variant_calls_zarr = zarr.open_consolidated(store=store)
8180
return self._cache_variant_calls_zarr
@@ -205,7 +204,7 @@ def open_genome(self):
205204
206205
"""
207206
if self._cache_genome is None:
208-
path = os.path.join(self._path, self.CONF["reference_path"])
207+
path = f"{self._path}/{self.CONF['reference_path']}"
209208
store = init_zarr_store(fs=self._fs, path=path)
210209
self._cache_genome = zarr.open_consolidated(store=store)
211210
return self._cache_genome
@@ -317,7 +316,7 @@ def genome_features(self, attributes=("ID", "Parent", "Name")):
317316
try:
318317
df = self._cache_genome_features[attributes]
319318
except KeyError:
320-
path = os.path.join(self._path, self.CONF["annotations_path"])
319+
path = f"{self._path}/{self.CONF['annotations_path']}"
321320
with self._fs.open(path, mode="rb") as f:
322321
df = read_gff3(f, compression="gzip")
323322
if attributes is not None:

tests/anoph/test_hap_data.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -605,7 +605,7 @@ def test_haplotypes_virtual_contigs(
605605

606606
# Test with region.
607607
seq = api.genome_sequence(region=chrom)
608-
start, stop = sorted(np.random.randint(low=1, high=len(seq), size=2))
608+
start, stop = sorted(map(int, np.random.randint(low=1, high=len(seq), size=2)))
609609
region = f"{chrom}:{start:,}-{stop:,}"
610610

611611
# Standard checks.

tests/anoph/test_snp_frq.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ def test_snp_effects(fixture, api: AnophelesSnpFrequencyAnalysis):
153153

154154
# Check some values.
155155
assert np.all(df["contig"] == transcript["contig"])
156-
position = df["position"].values
156+
position = df["position"].to_numpy()
157157
assert np.all(position >= transcript["start"])
158158
assert np.all(position <= transcript["end"])
159159
assert np.all(position[1:] >= position[:-1])

0 commit comments

Comments
 (0)