Skip to content

Commit fb451be

Browse files
committed
Remove unused utils
1 parent 3922eff commit fb451be

File tree

2 files changed

+2
-285
lines changed

2 files changed

+2
-285
lines changed

bio2zarr/utils.py

Lines changed: 2 additions & 154 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,16 @@
1-
import itertools
21
import struct
3-
import tempfile
4-
import uuid
5-
from contextlib import contextmanager
6-
from typing import IO, Any, Dict, Hashable, Iterator, Optional, Sequence, TypeVar
7-
from urllib.parse import urlparse
2+
from typing import IO, Any, Dict, Optional, Sequence
83

94
import fsspec
10-
from numcodecs import Delta, PackBits
115

12-
from sgkit.typing import PathType
13-
14-
T = TypeVar("T")
6+
from bio2zarr.typing import PathType
157

168

179
def ceildiv(a: int, b: int) -> int:
1810
"""Safe integer ceil function"""
1911
return -(-a // b)
2012

2113

22-
# Based on https://dev.to/orenovadia/solution-chunked-iterator-python-riddle-3ple
23-
def chunks(iterator: Iterator[T], n: int) -> Iterator[Iterator[T]]:
24-
"""
25-
Convert an iterator into an iterator of iterators, where the inner iterators
26-
each return `n` items, except the last, which may return fewer.
27-
28-
For the special case of an empty iterator, an iterator of an empty iterator is
29-
returned.
30-
"""
31-
32-
empty_iterator = True
33-
for first in iterator: # take one item out (exits loop if `iterator` is empty)
34-
empty_iterator = False
35-
rest_of_chunk = itertools.islice(iterator, 0, n - 1)
36-
yield itertools.chain([first], rest_of_chunk) # concatenate the first item back
37-
if empty_iterator:
38-
yield iter([])
39-
40-
4114
def get_file_length(
4215
path: PathType, storage_options: Optional[Dict[str, str]] = None
4316
) -> int:
@@ -107,128 +80,3 @@ def open_gzip(path: PathType, storage_options: Optional[Dict[str, str]]) -> IO[A
10780
storage_options = storage_options or {}
10881
openfile: IO[Any] = fsspec.open(url, compression="gzip", **storage_options)
10982
return openfile
110-
111-
112-
def url_filename(url: str) -> str:
113-
"""Extract the filename from a URL"""
114-
filename: str = URL(url).name
115-
return filename
116-
117-
118-
def build_url(dir_url: PathType, child_path: str) -> str:
119-
"""Combine a URL for a directory with a child path"""
120-
url = URL(str(dir_url))
121-
# the division (/) operator discards query and fragment, so add them back
122-
return str((url / child_path).with_query(url.query).with_fragment(url.fragment))
123-
124-
125-
@contextmanager
126-
def temporary_directory(
127-
suffix: Optional[str] = None,
128-
prefix: Optional[str] = None,
129-
dir: Optional[PathType] = None,
130-
storage_options: Optional[Dict[str, str]] = None,
131-
retain_temp_files: Optional[bool] = None,
132-
) -> Iterator[str]:
133-
"""Create a temporary directory in a fsspec filesystem.
134-
135-
Parameters
136-
----------
137-
suffix : Optional[str], optional
138-
If not None, the name of the temporary directory will end with that suffix.
139-
prefix : Optional[str], optional
140-
If not None, the name of the temporary directory will start with that prefix.
141-
dir : Optional[PathType], optional
142-
If not None, the temporary directory will be created in that directory, otherwise
143-
the local filesystem directory returned by `tempfile.gettempdir()` will be used.
144-
The directory may be specified as any fsspec URL.
145-
storage_options : Optional[Dict[str, str]], optional
146-
Any additional parameters for the storage backend (see `fsspec.open`).
147-
retain_temp_files : Optional[bool], optional
148-
If True, the temporary directory will not be removed on exiting the context manager.
149-
Defaults to None, which means the directory will be removed.
150-
Yields
151-
-------
152-
Generator[str, None, None]
153-
A context manager yielding the fsspec URL to the created directory.
154-
"""
155-
156-
# Fill in defaults
157-
suffix = suffix or ""
158-
prefix = prefix or ""
159-
dir = dir or tempfile.gettempdir()
160-
storage_options = storage_options or {}
161-
162-
# Find the filesystem by looking at the URL scheme (protocol), empty means local filesystem
163-
protocol = urlparse(str(dir)).scheme
164-
fs = fsspec.filesystem(protocol, **storage_options)
165-
166-
# Construct a random directory name
167-
tempdir = build_url(dir, prefix + str(uuid.uuid4()) + suffix)
168-
try:
169-
fs.mkdir(tempdir)
170-
yield tempdir
171-
finally:
172-
# Remove the temporary directory on exiting the context manager
173-
if not retain_temp_files:
174-
fs.rm(tempdir, recursive=True)
175-
176-
177-
def get_default_vcf_encoding(ds, chunk_length, chunk_width, compressor):
178-
# Enforce uniform chunks in the variants dimension
179-
# Also chunk in the samples direction
180-
def get_chunk_size(dim: Hashable, size: int) -> int:
181-
if dim == "variants":
182-
return chunk_length
183-
elif dim == "samples":
184-
return chunk_width
185-
else:
186-
# Avoid chunk size of 0
187-
return max(size, 1)
188-
189-
default_encoding = {}
190-
for var in ds.data_vars:
191-
var_chunks = tuple(
192-
get_chunk_size(dim, size)
193-
for (dim, size) in zip(ds[var].dims, ds[var].shape)
194-
)
195-
default_encoding[var] = dict(chunks=var_chunks, compressor=compressor)
196-
197-
# Enable bit packing by default for boolean arrays
198-
if ds[var].dtype.kind == "b":
199-
default_encoding[var]["filters"] = [PackBits()]
200-
201-
# Position is monotonically increasing (within a contig) so benefits from delta encoding
202-
if var == "variant_position":
203-
default_encoding[var]["filters"] = [Delta(ds[var].dtype)]
204-
205-
return default_encoding
206-
207-
208-
def merge_encodings(
209-
default_encoding: Dict[str, Dict[str, Any]], overrides: Dict[str, Dict[str, Any]]
210-
) -> Dict[str, Dict[str, Any]]:
211-
"""Merge a dictionary of dictionaries specifying encodings with another dictionary of dictionaries of overriding encodings.
212-
213-
Parameters
214-
----------
215-
default_encoding : Dict[str, Dict[str, Any]]
216-
The default encoding dictionary.
217-
overrides : Dict[str, Dict[str, Any]]
218-
A dictionary containing selective overrides.
219-
220-
Returns
221-
-------
222-
Dict[str, Dict[str, Any]]
223-
The merged encoding dictionary
224-
"""
225-
merged = {}
226-
for var, d in default_encoding.items():
227-
if var in overrides:
228-
merged[var] = {**d, **overrides[var]}
229-
else:
230-
merged[var] = d
231-
for var, d in overrides.items():
232-
if var not in merged:
233-
merged[var] = d
234-
return merged

tests/test_utils.py

Lines changed: 0 additions & 131 deletions
This file was deleted.

0 commit comments

Comments
 (0)