Skip to content

Commit 6ae749e

Browse files
Merge pull request #31 from SuperDARNCanada/develop
Release: v0.7.0
2 parents dd317c6 + d634643 commit 6ae749e

File tree

11 files changed

+127
-58
lines changed

11 files changed

+127
-58
lines changed

.github/workflows/CI.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -148,9 +148,9 @@ jobs:
148148
strategy:
149149
matrix:
150150
platform:
151-
- runner: macos-13
151+
- runner: macos-15-intel
152152
target: x86_64
153-
- runner: macos-14
153+
- runner: macos-latest
154154
target: aarch64
155155
steps:
156156
- uses: actions/checkout@v4

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,6 @@ __pycache__/
77
# Built files
88
target/
99

10+
# Rust dependency versions
11+
Cargo.lock
12+

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "darn-dmap"
3-
version = "0.6.0"
3+
version = "0.7.0"
44
edition = "2021"
55
rust-version = "1.63.0"
66
authors = ["Remington Rohel"]

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "maturin"
44

55
[project]
66
name = "darn-dmap"
7-
version = "0.6.0"
7+
version = "0.7.0"
88
requires-python = ">=3.8"
99
authors = [
1010
{ name = "Remington Rohel" }

python/README.md

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -80,8 +80,8 @@ fitacf_file = "path/to/file.bz2"
8080
data, _ = dmap.read_fitacf(fitacf_file)
8181
dmap.write_fitacf(data, "temp.fitacf.bz2")
8282
```
83-
will read in the compressed file, then also write out a new compressed file. Note that compression on the writing side
84-
will only be done when writing to file, as the detection is done based on the file extension of the output file.
83+
will read in the compressed file, then also write out a new compressed file. You can also pass the argument `bz2=True`
84+
to compress with `bzip2` regardless of file extension, or even to return compressed byte objects.
8585

8686
### Generic I/O
8787
dmap supports generic DMAP I/O, without verifying the field names and types. The file must still
@@ -162,10 +162,10 @@ assert binary_data == raw_bytes
162162
```
163163
As a note, this binary data can be compressed ~2x typically using zlib, or with another compression utility. This is quite
164164
useful if sending data over a network where speed and bandwidth must be considered. Note that the binary writing functions
165-
don't compress automatically, an external package like `zlib` or `bzip2` must be used.
165+
can compress with bzip2 by passing `bz2=True` as an argument.
166166

167167
### File "sniffing"
168-
If you only want to inspect a file, without actually needing access to all of the data, you can use the `read_[type]`
168+
If you only want to inspect a file, without actually needing access to all the data, you can use the `read_[type]`
169169
functions in `"sniff"` mode. This will only read in the first record from a file, and works on both compressed and
170170
non-compressed files. Note that this mode does not work with bytes objects directly.
171171

@@ -174,3 +174,9 @@ import dmap
174174
path = "path/to/file"
175175
first_rec = dmap.read_dmap(path, mode="sniff")
176176
```
177+
178+
### Reading only metadata fields
179+
Each DMAP format consists of metadata and data fields. You can read only the metadata fields by passing `mode="metadata"`
180+
to any of the writing functions. Note that the generic read function `read_dmap` will return all fields, as it by nature
181+
has no knowledge of the underlying fields. Note also that the read functions operating on a file still read the entire
182+
file into memory first, so reading metadata only does not largely decrease read times.

python/dmap/_wrapper.py

Lines changed: 33 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ def read_dispatcher(
7373

7474

7575
def write_dispatcher(
76-
source: list[dict], fmt: str, outfile: Union[None, str]
76+
source: list[dict], fmt: str, outfile: Union[None, str], bz2: bool,
7777
) -> Union[None, bytes]:
7878
"""
7979
Writes DMAP data from `source` to either a `bytes` object or to `outfile`.
@@ -88,15 +88,17 @@ def write_dispatcher(
8888
If `None`, returns the data as a `bytes` object. If this is a string, then this is interpreted as a path
8989
and data will be written to the filesystem. If the file ends in the `.bz2` extension, the data will be
9090
compressed using bzip2.
91+
bz2: bool
92+
If `True`, the data will be compressed with `bzip2`.
9193
"""
9294
if fmt not in ["dmap", "iqdat", "rawacf", "fitacf", "grid", "map", "snd"]:
9395
raise ValueError(
9496
f"invalid fmt `{fmt}`: expected one of ['dmap', 'iqdat', 'rawacf', 'fitacf', 'grid', 'map', 'snd']"
9597
)
9698
if outfile is None:
97-
return getattr(dmap_rs, f"write_{fmt}_bytes")(source)
99+
return getattr(dmap_rs, f"write_{fmt}_bytes")(source, bz2=bz2)
98100
elif isinstance(outfile, str):
99-
getattr(dmap_rs, f"write_{fmt}")(source, outfile)
101+
getattr(dmap_rs, f"write_{fmt}")(source, outfile, bz2=bz2)
100102
else:
101103
raise TypeError(
102104
f"invalid type for `outfile` {type(outfile)}: expected `str` or `None`"
@@ -308,7 +310,7 @@ def read_snd(
308310

309311

310312
def write_dmap(
311-
source: list[dict], outfile: Union[None, str] = None
313+
source: list[dict], outfile: Union[None, str] = None, bz2: bool = False,
312314
) -> Union[None, bytes]:
313315
"""
314316
Writes DMAP data from `source` to either a `bytes` object or to `outfile`.
@@ -321,12 +323,14 @@ def write_dmap(
321323
If `None`, returns the data as a `bytes` object. If this is a string, then this is interpreted as a path
322324
and data will be written to the filesystem. If the file ends in the `.bz2` extension, the data will be
323325
compressed using bzip2.
326+
bz2: bool
327+
If `True`, the data will be compressed with `bzip2`.
324328
"""
325-
return write_dispatcher(source, "dmap", outfile)
329+
return write_dispatcher(source, "dmap", outfile, bz2=bz2)
326330

327331

328332
def write_iqdat(
329-
source: list[dict], outfile: Union[None, str] = None
333+
source: list[dict], outfile: Union[None, str] = None, bz2: bool = False,
330334
) -> Union[None, bytes]:
331335
"""
332336
Writes IQDAT data from `source` to either a `bytes` object or to `outfile`.
@@ -339,12 +343,14 @@ def write_iqdat(
339343
If `None`, returns the data as a `bytes` object. If this is a string, then this is interpreted as a path
340344
and data will be written to the filesystem. If the file ends in the `.bz2` extension, the data will be
341345
compressed using bzip2.
346+
bz2: bool
347+
If `True`, the data will be compressed with `bzip2`.
342348
"""
343-
return write_dispatcher(source, "iqdat", outfile)
349+
return write_dispatcher(source, "iqdat", outfile, bz2=bz2)
344350

345351

346352
def write_rawacf(
347-
source: list[dict], outfile: Union[None, str] = None
353+
source: list[dict], outfile: Union[None, str] = None, bz2: bool = False,
348354
) -> Union[None, bytes]:
349355
"""
350356
Writes RAWACF data from `source` to either a `bytes` object or to `outfile`.
@@ -357,12 +363,14 @@ def write_rawacf(
357363
If `None`, returns the data as a `bytes` object. If this is a string, then this is interpreted as a path
358364
and data will be written to the filesystem. If the file ends in the `.bz2` extension, the data will be
359365
compressed using bzip2.
366+
bz2: bool
367+
If `True`, the data will be compressed with `bzip2`.
360368
"""
361-
return write_dispatcher(source, "rawacf", outfile)
369+
return write_dispatcher(source, "rawacf", outfile, bz2=bz2)
362370

363371

364372
def write_fitacf(
365-
source: list[dict], outfile: Union[None, str] = None
373+
source: list[dict], outfile: Union[None, str] = None, bz2: bool = False,
366374
) -> Union[None, bytes]:
367375
"""
368376
Writes FITACF data from `source` to either a `bytes` object or to `outfile`.
@@ -375,12 +383,14 @@ def write_fitacf(
375383
If `None`, returns the data as a `bytes` object. If this is a string, then this is interpreted as a path
376384
and data will be written to the filesystem. If the file ends in the `.bz2` extension, the data will be
377385
compressed using bzip2.
386+
bz2: bool
387+
If `True`, the data will be compressed with `bzip2`.
378388
"""
379-
return write_dispatcher(source, "fitacf", outfile)
389+
return write_dispatcher(source, "fitacf", outfile, bz2=bz2)
380390

381391

382392
def write_grid(
383-
source: list[dict], outfile: Union[None, str] = None
393+
source: list[dict], outfile: Union[None, str] = None, bz2: bool = False,
384394
) -> Union[None, bytes]:
385395
"""
386396
Writes GRID data from `source` to either a `bytes` object or to `outfile`.
@@ -393,12 +403,14 @@ def write_grid(
393403
If `None`, returns the data as a `bytes` object. If this is a string, then this is interpreted as a path
394404
and data will be written to the filesystem. If the file ends in the `.bz2` extension, the data will be
395405
compressed using bzip2.
406+
bz2: bool
407+
If `True`, the data will be compressed with `bzip2`.
396408
"""
397-
return write_dispatcher(source, "grid", outfile)
409+
return write_dispatcher(source, "grid", outfile, bz2=bz2)
398410

399411

400412
def write_map(
401-
source: list[dict], outfile: Union[None, str] = None
413+
source: list[dict], outfile: Union[None, str] = None, bz2: bool = False,
402414
) -> Union[None, bytes]:
403415
"""
404416
Writes MAP data from `source` to either a `bytes` object or to `outfile`.
@@ -411,12 +423,14 @@ def write_map(
411423
If `None`, returns the data as a `bytes` object. If this is a string, then this is interpreted as a path
412424
and data will be written to the filesystem. If the file ends in the `.bz2` extension, the data will be
413425
compressed using bzip2.
426+
bz2: bool
427+
If `True`, the data will be compressed with `bzip2`.
414428
"""
415-
return write_dispatcher(source, "map", outfile)
429+
return write_dispatcher(source, "map", outfile, bz2=bz2)
416430

417431

418432
def write_snd(
419-
source: list[dict], outfile: Union[None, str] = None
433+
source: list[dict], outfile: Union[None, str] = None, bz2: bool = False,
420434
) -> Union[None, bytes]:
421435
"""
422436
Writes SND data from `source` to either a `bytes` object or to `outfile`.
@@ -429,5 +443,7 @@ def write_snd(
429443
If `None`, returns the data as a `bytes` object. If this is a string, then this is interpreted as a path
430444
and data will be written to the filesystem. If the file ends in the `.bz2` extension, the data will be
431445
compressed using bzip2.
446+
bz2: bool
447+
If `True`, the data will be compressed with `bzip2`.
432448
"""
433-
return write_dispatcher(source, "snd", outfile)
449+
return write_dispatcher(source, "snd", outfile, bz2=bz2)

src/compression.rs

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,29 @@
22
//!
33
//! Currently only supports bz2 compression detection.
44
5+
use bzip2::read::BzEncoder;
6+
use bzip2::Compression;
57
use std::io::{Chain, Cursor, Error, Read};
68

9+
/// Compress bytes using [`bzip2::BzEncoder`].
10+
///
11+
/// # Errors
12+
/// See [`Read::read_to_end`].
13+
pub(crate) fn compress_bz2(bytes: &[u8]) -> Result<Vec<u8>, Error> {
14+
let mut out_bytes: Vec<u8> = vec![];
15+
let mut compressor = BzEncoder::new(bytes, Compression::best());
16+
compressor.read_to_end(&mut out_bytes)?;
17+
18+
Ok(out_bytes)
19+
}
20+
721
type PartiallyReadStream<T> = Chain<Cursor<[u8; 3]>, T>;
822

923
/// Detects bz2 compression on the input `stream`. Returns a reader
1024
/// which includes all data from `stream`.
1125
///
1226
/// # Errors
13-
/// See [`std::io::Read::read_exact`].
27+
/// See [`Read::read_exact`].
1428
pub(crate) fn detect_bz2<T>(mut stream: T) -> Result<(bool, PartiallyReadStream<T>), Error>
1529
where
1630
T: for<'a> Read,

src/io.rs

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,43 @@
11
//! Utility functions for file operations.
22
3-
use bzip2::{read::BzEncoder, Compression};
3+
use crate::compression::compress_bz2;
44
use std::ffi::OsStr;
55
use std::fs::{File, OpenOptions};
6-
use std::io::{Read, Write};
6+
use std::io::Write;
77
use std::path::Path;
88

99
/// Write bytes to file.
1010
///
1111
/// Ordinarily, this function opens the file in `append` mode. If the extension of `outfile` is
12-
/// `.bz2`, the bytes will be compressed using bzip2 before being written.
12+
/// `.bz2` or `bz2` is `true`, the bytes will be compressed using bzip2 before being written.
1313
///
1414
/// # Errors
15-
/// If opening the file in append mode is not possible (permissions, path doesn't exist, etc.). See [`std::fs::File::open`].
15+
/// If opening the file in append mode is not possible (permissions, path doesn't exist, etc.). See [`File::open`].
1616
///
17-
/// If an error is encountered when compressing the bytes.
18-
///
19-
/// If an error is encountered when writing the bytes to the filesystem. See [`std::io::Write::write_all`]
17+
/// If an error is encountered when writing the bytes to the filesystem. See [`Write::write_all`]
2018
pub(crate) fn bytes_to_file<P: AsRef<Path>>(
2119
bytes: Vec<u8>,
2220
outfile: P,
21+
bz2: bool,
2322
) -> Result<(), std::io::Error> {
24-
let mut out_bytes: Vec<u8> = vec![];
2523
let compress_file: bool =
26-
matches!(outfile.as_ref().extension(), Some(ext) if ext == OsStr::new("bz2"));
24+
bz2 || matches!(outfile.as_ref().extension(), Some(ext) if ext == OsStr::new("bz2"));
2725
let mut file: File = OpenOptions::new().append(true).create(true).open(outfile)?;
2826
if compress_file {
29-
let mut compressor = BzEncoder::new(bytes.as_slice(), Compression::best());
30-
compressor.read_to_end(&mut out_bytes)?;
27+
write_bytes_bz2(bytes, &mut file)
3128
} else {
32-
out_bytes = bytes;
29+
file.write_all(&bytes)
3330
}
31+
}
3432

35-
file.write_all(&out_bytes)
33+
/// Writes `bytes` to a [`Write`] implementor, compressing with [`bzip2::BzEncoder`] first.
34+
///
35+
/// # Errors
36+
/// From [`compress_bz2`] or [`Write::write_all`].
37+
pub(crate) fn write_bytes_bz2<W: Write>(
38+
bytes: Vec<u8>,
39+
writer: &mut W,
40+
) -> Result<(), std::io::Error> {
41+
let out_bytes: Vec<u8> = compress_bz2(&bytes)?;
42+
writer.write_all(&out_bytes)
3643
}

src/lib.rs

Lines changed: 33 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@
6262
//!
6363
//! // Write the records to a file
6464
//! let out_path = PathBuf::from("tests/test_files/output.rawacf");
65-
//! RawacfRecord::write_to_file(&rawacf_data, &out_path)?;
65+
//! RawacfRecord::write_to_file(&rawacf_data, &out_path, false)?;
6666
//! # std::fs::remove_file(out_path)?;
6767
//! # Ok(())
6868
//! # }
@@ -121,9 +121,10 @@ macro_rules! write_rust {
121121
pub fn [< try_write_ $type >]<P: AsRef<Path>>(
122122
recs: Vec<IndexMap<String, DmapField>>,
123123
outfile: P,
124+
bz2: bool,
124125
) -> Result<(), DmapError> {
125126
let bytes = [< $type:camel Record >]::try_into_bytes(recs)?;
126-
crate::io::bytes_to_file(bytes, outfile).map_err(DmapError::from)
127+
crate::io::bytes_to_file(bytes, outfile, bz2).map_err(DmapError::from)
127128
}
128129
}
129130
}
@@ -308,9 +309,14 @@ read_py!(
308309
/// does not know that typically `stid` is two bytes.
309310
#[pyfunction]
310311
#[pyo3(name = "write_dmap")]
311-
#[pyo3(text_signature = "(recs: list[dict], outfile: str, /)")]
312-
fn write_dmap_py(recs: Vec<IndexMap<String, DmapField>>, outfile: PathBuf) -> PyResult<()> {
313-
try_write_dmap(recs, &outfile).map_err(PyErr::from)
312+
#[pyo3(signature = (recs, outfile, /, bz2))]
313+
#[pyo3(text_signature = "(recs: list[dict], outfile: str, /, bz2: bool = False)")]
314+
fn write_dmap_py(
315+
recs: Vec<IndexMap<String, DmapField>>,
316+
outfile: PathBuf,
317+
bz2: bool,
318+
) -> PyResult<()> {
319+
try_write_dmap(recs, &outfile, bz2).map_err(PyErr::from)
314320
}
315321

316322
/// Checks that a list of dictionaries contains valid DMAP records, then converts them to bytes.
@@ -321,9 +327,17 @@ fn write_dmap_py(recs: Vec<IndexMap<String, DmapField>>, outfile: PathBuf) -> Py
321327
/// does not know that typically `stid` is two bytes.
322328
#[pyfunction]
323329
#[pyo3(name = "write_dmap_bytes")]
324-
#[pyo3(text_signature = "(recs: list[dict], /)")]
325-
fn write_dmap_bytes_py(py: Python, recs: Vec<IndexMap<String, DmapField>>) -> PyResult<Py<PyAny>> {
326-
let bytes = DmapRecord::try_into_bytes(recs).map_err(PyErr::from)?;
330+
#[pyo3(signature = (recs, /, bz2))]
331+
#[pyo3(text_signature = "(recs: list[dict], /, bz2: bool = False)")]
332+
fn write_dmap_bytes_py(
333+
py: Python,
334+
recs: Vec<IndexMap<String, DmapField>>,
335+
bz2: bool,
336+
) -> PyResult<Py<PyAny>> {
337+
let mut bytes = DmapRecord::try_into_bytes(recs).map_err(PyErr::from)?;
338+
if bz2 {
339+
bytes = compression::compress_bz2(&bytes).map_err(PyErr::from)?;
340+
}
327341
Ok(PyBytes::new(py, &bytes).into())
328342
}
329343

@@ -334,18 +348,23 @@ macro_rules! write_py {
334348
#[doc = "Checks that a list of dictionaries contains valid `" $name:upper "` records, then appends to outfile." ]
335349
#[pyfunction]
336350
#[pyo3(name = $fn_name)]
337-
#[pyo3(text_signature = "(recs: list[dict], outfile: str, /)")]
338-
fn [< write_ $name _py >](recs: Vec<IndexMap<String, DmapField>>, outfile: PathBuf) -> PyResult<()> {
339-
[< try_write_ $name >](recs, &outfile).map_err(PyErr::from)
351+
#[pyo3(signature = (recs, outfile, /, bz2))]
352+
#[pyo3(text_signature = "(recs: list[dict], outfile: str, /, bz2: bool = False)")]
353+
fn [< write_ $name _py >](recs: Vec<IndexMap<String, DmapField>>, outfile: PathBuf, bz2: bool) -> PyResult<()> {
354+
[< try_write_ $name >](recs, &outfile, bz2).map_err(PyErr::from)
340355
}
341356

342357
#[doc = "Checks that a list of dictionaries contains valid `" $name:upper "` records, then converts them to bytes." ]
343358
#[doc = "Returns `list[bytes]`, one entry per record." ]
344359
#[pyfunction]
345360
#[pyo3(name = $bytes_name)]
346-
#[pyo3(text_signature = "(recs: list[dict], /)")]
347-
fn [< write_ $name _bytes_py >](py: Python, recs: Vec<IndexMap<String, DmapField>>) -> PyResult<Py<PyAny>> {
348-
let bytes = [< $name:camel Record >]::try_into_bytes(recs).map_err(PyErr::from)?;
361+
#[pyo3(signature = (recs, /, bz2))]
362+
#[pyo3(text_signature = "(recs: list[dict], /, bz2: bool = False)")]
363+
fn [< write_ $name _bytes_py >](py: Python, recs: Vec<IndexMap<String, DmapField>>, bz2: bool) -> PyResult<Py<PyAny>> {
364+
let mut bytes = [< $name:camel Record >]::try_into_bytes(recs).map_err(PyErr::from)?;
365+
if bz2 {
366+
bytes = compression::compress_bz2(&bytes).map_err(PyErr::from)?;
367+
}
349368
Ok(PyBytes::new(py, &bytes).into())
350369
}
351370
}

0 commit comments

Comments
 (0)