Merge pull request #31 from SuperDARNCanada/develop

RemingtonRohel · web-flow · commit 6ae749eed6d5 · 2026-01-23T18:13:51.000Z
Release: v0.7.0
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
@@ -148,9 +148,9 @@ jobs:
     strategy:
       matrix:
         platform:
-          - runner: macos-13
+          - runner: macos-15-intel
             target: x86_64
-          - runner: macos-14
+          - runner: macos-latest
             target: aarch64
     steps:
       - uses: actions/checkout@v4
diff --git a/.gitignore b/.gitignore
@@ -7,3 +7,6 @@ __pycache__/
 # Built files
 target/
 
+# Rust dependency versions
+Cargo.lock
+
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "darn-dmap"
-version = "0.6.0"
+version = "0.7.0"
 edition = "2021"
 rust-version = "1.63.0"
 authors = ["Remington Rohel"]
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "maturin"
 
 [project]
 name = "darn-dmap"
-version = "0.6.0"
+version = "0.7.0"
 requires-python = ">=3.8"
 authors = [
     { name = "Remington Rohel" }
diff --git a/python/README.md b/python/README.md
@@ -80,8 +80,8 @@ fitacf_file = "path/to/file.bz2"
 data, _ = dmap.read_fitacf(fitacf_file)
 dmap.write_fitacf(data, "temp.fitacf.bz2")
 ```
-will read in the compressed file, then also write out a new compressed file. Note that compression on the writing side
-will only be done when writing to file, as the detection is done based on the file extension of the output file.
+will read in the compressed file, then also write out a new compressed file. You can also pass the argument `bz2=True`
+to compress with `bzip2` regardless of file extension, or even to return compressed byte objects.
 
 ### Generic I/O
 dmap supports generic DMAP I/O, without verifying the field names and types. The file must still
@@ -162,10 +162,10 @@ assert binary_data == raw_bytes
 ```
 As a note, this binary data can be compressed ~2x typically using zlib, or with another compression utility. This is quite 
 useful if sending data over a network where speed and bandwidth must be considered. Note that the binary writing functions
-don't compress automatically, an external package like `zlib` or `bzip2` must be used.
+can compress with bzip2 by passing `bz2=True` as an argument.
 
 ### File "sniffing"
-If you only want to inspect a file, without actually needing access to all of the data, you can use the `read_[type]`
+If you only want to inspect a file, without actually needing access to all the data, you can use the `read_[type]`
 functions in `"sniff"` mode. This will only read in the first record from a file, and works on both compressed and 
 non-compressed files. Note that this mode does not work with bytes objects directly.
 
@@ -174,3 +174,9 @@ import dmap
 path = "path/to/file"
 first_rec = dmap.read_dmap(path, mode="sniff")
 ```
+
+### Reading only metadata fields
+Each DMAP format consists of metadata and data fields. You can read only the metadata fields by passing `mode="metadata"`
+to any of the writing functions. Note that the generic read function `read_dmap` will return all fields, as it by nature
+has no knowledge of the underlying fields. Note also that the read functions operating on a file still read the entire
+file into memory first, so reading metadata only does not largely decrease read times.
diff --git a/python/dmap/_wrapper.py b/python/dmap/_wrapper.py
@@ -73,7 +73,7 @@ def read_dispatcher(
 
 
 def write_dispatcher(
-    source: list[dict], fmt: str, outfile: Union[None, str]
+    source: list[dict], fmt: str, outfile: Union[None, str], bz2: bool,
 ) -> Union[None, bytes]:
     """
     Writes DMAP data from `source` to either a `bytes` object or to `outfile`.
@@ -88,15 +88,17 @@ def write_dispatcher(
         If `None`, returns the data as a `bytes` object. If this is a string, then this is interpreted as a path
         and data will be written to the filesystem. If the file ends in the `.bz2` extension, the data will be
         compressed using bzip2.
+    bz2: bool
+        If `True`, the data will be compressed with `bzip2`.
     """
     if fmt not in ["dmap", "iqdat", "rawacf", "fitacf", "grid", "map", "snd"]:
         raise ValueError(
             f"invalid fmt `{fmt}`: expected one of ['dmap', 'iqdat', 'rawacf', 'fitacf', 'grid', 'map', 'snd']"
         )
     if outfile is None:
-        return getattr(dmap_rs, f"write_{fmt}_bytes")(source)
+        return getattr(dmap_rs, f"write_{fmt}_bytes")(source, bz2=bz2)
     elif isinstance(outfile, str):
-        getattr(dmap_rs, f"write_{fmt}")(source, outfile)
+        getattr(dmap_rs, f"write_{fmt}")(source, outfile, bz2=bz2)
     else:
         raise TypeError(
             f"invalid type for `outfile` {type(outfile)}: expected `str` or `None`"
@@ -308,7 +310,7 @@ def read_snd(
 
 
 def write_dmap(
-    source: list[dict], outfile: Union[None, str] = None
+    source: list[dict], outfile: Union[None, str] = None, bz2: bool = False,
 ) -> Union[None, bytes]:
     """
     Writes DMAP data from `source` to either a `bytes` object or to `outfile`.
@@ -321,12 +323,14 @@ def write_dmap(
         If `None`, returns the data as a `bytes` object. If this is a string, then this is interpreted as a path
         and data will be written to the filesystem. If the file ends in the `.bz2` extension, the data will be
         compressed using bzip2.
+    bz2: bool
+        If `True`, the data will be compressed with `bzip2`.
     """
-    return write_dispatcher(source, "dmap", outfile)
+    return write_dispatcher(source, "dmap", outfile, bz2=bz2)
 
 
 def write_iqdat(
-    source: list[dict], outfile: Union[None, str] = None
+    source: list[dict], outfile: Union[None, str] = None, bz2: bool = False,
 ) -> Union[None, bytes]:
     """
     Writes IQDAT data from `source` to either a `bytes` object or to `outfile`.
@@ -339,12 +343,14 @@ def write_iqdat(
         If `None`, returns the data as a `bytes` object. If this is a string, then this is interpreted as a path
         and data will be written to the filesystem. If the file ends in the `.bz2` extension, the data will be
         compressed using bzip2.
+    bz2: bool
+        If `True`, the data will be compressed with `bzip2`.
     """
-    return write_dispatcher(source, "iqdat", outfile)
+    return write_dispatcher(source, "iqdat", outfile, bz2=bz2)
 
 
 def write_rawacf(
-    source: list[dict], outfile: Union[None, str] = None
+    source: list[dict], outfile: Union[None, str] = None, bz2: bool = False,
 ) -> Union[None, bytes]:
     """
     Writes RAWACF data from `source` to either a `bytes` object or to `outfile`.
@@ -357,12 +363,14 @@ def write_rawacf(
         If `None`, returns the data as a `bytes` object. If this is a string, then this is interpreted as a path
         and data will be written to the filesystem. If the file ends in the `.bz2` extension, the data will be
         compressed using bzip2.
+    bz2: bool
+        If `True`, the data will be compressed with `bzip2`.
     """
-    return write_dispatcher(source, "rawacf", outfile)
+    return write_dispatcher(source, "rawacf", outfile, bz2=bz2)
 
 
 def write_fitacf(
-    source: list[dict], outfile: Union[None, str] = None
+    source: list[dict], outfile: Union[None, str] = None, bz2: bool = False,
 ) -> Union[None, bytes]:
     """
     Writes FITACF data from `source` to either a `bytes` object or to `outfile`.
@@ -375,12 +383,14 @@ def write_fitacf(
         If `None`, returns the data as a `bytes` object. If this is a string, then this is interpreted as a path
         and data will be written to the filesystem. If the file ends in the `.bz2` extension, the data will be
         compressed using bzip2.
+    bz2: bool
+        If `True`, the data will be compressed with `bzip2`.
     """
-    return write_dispatcher(source, "fitacf", outfile)
+    return write_dispatcher(source, "fitacf", outfile, bz2=bz2)
 
 
 def write_grid(
-    source: list[dict], outfile: Union[None, str] = None
+    source: list[dict], outfile: Union[None, str] = None, bz2: bool = False,
 ) -> Union[None, bytes]:
     """
     Writes GRID data from `source` to either a `bytes` object or to `outfile`.
@@ -393,12 +403,14 @@ def write_grid(
         If `None`, returns the data as a `bytes` object. If this is a string, then this is interpreted as a path
         and data will be written to the filesystem. If the file ends in the `.bz2` extension, the data will be
         compressed using bzip2.
+    bz2: bool
+        If `True`, the data will be compressed with `bzip2`.
     """
-    return write_dispatcher(source, "grid", outfile)
+    return write_dispatcher(source, "grid", outfile, bz2=bz2)
 
 
 def write_map(
-    source: list[dict], outfile: Union[None, str] = None
+    source: list[dict], outfile: Union[None, str] = None, bz2: bool = False,
 ) -> Union[None, bytes]:
     """
     Writes MAP data from `source` to either a `bytes` object or to `outfile`.
@@ -411,12 +423,14 @@ def write_map(
         If `None`, returns the data as a `bytes` object. If this is a string, then this is interpreted as a path
         and data will be written to the filesystem. If the file ends in the `.bz2` extension, the data will be
         compressed using bzip2.
+    bz2: bool
+        If `True`, the data will be compressed with `bzip2`.
     """
-    return write_dispatcher(source, "map", outfile)
+    return write_dispatcher(source, "map", outfile, bz2=bz2)
 
 
 def write_snd(
-    source: list[dict], outfile: Union[None, str] = None
+    source: list[dict], outfile: Union[None, str] = None, bz2: bool = False,
 ) -> Union[None, bytes]:
     """
     Writes SND data from `source` to either a `bytes` object or to `outfile`.
@@ -429,5 +443,7 @@ def write_snd(
         If `None`, returns the data as a `bytes` object. If this is a string, then this is interpreted as a path
         and data will be written to the filesystem. If the file ends in the `.bz2` extension, the data will be
         compressed using bzip2.
+    bz2: bool
+        If `True`, the data will be compressed with `bzip2`.
     """
-    return write_dispatcher(source, "snd", outfile)
+    return write_dispatcher(source, "snd", outfile, bz2=bz2)
diff --git a/src/compression.rs b/src/compression.rs
@@ -2,15 +2,29 @@
 //!
 //! Currently only supports bz2 compression detection.
 
+use bzip2::read::BzEncoder;
+use bzip2::Compression;
 use std::io::{Chain, Cursor, Error, Read};
 
+/// Compress bytes using [`bzip2::BzEncoder`].
+///
+/// # Errors
+/// See [`Read::read_to_end`].
+pub(crate) fn compress_bz2(bytes: &[u8]) -> Result<Vec<u8>, Error> {
+    let mut out_bytes: Vec<u8> = vec![];
+    let mut compressor = BzEncoder::new(bytes, Compression::best());
+    compressor.read_to_end(&mut out_bytes)?;
+
+    Ok(out_bytes)
+}
+
 type PartiallyReadStream<T> = Chain<Cursor<[u8; 3]>, T>;
 
 /// Detects bz2 compression on the input `stream`. Returns a reader
 /// which includes all data from `stream`.
 ///
 /// # Errors
-/// See [`std::io::Read::read_exact`].
+/// See [`Read::read_exact`].
 pub(crate) fn detect_bz2<T>(mut stream: T) -> Result<(bool, PartiallyReadStream<T>), Error>
 where
     T: for<'a> Read,
diff --git a/src/io.rs b/src/io.rs
@@ -1,36 +1,43 @@
 //! Utility functions for file operations.
 
-use bzip2::{read::BzEncoder, Compression};
+use crate::compression::compress_bz2;
 use std::ffi::OsStr;
 use std::fs::{File, OpenOptions};
-use std::io::{Read, Write};
+use std::io::Write;
 use std::path::Path;
 
 /// Write bytes to file.
 ///
 /// Ordinarily, this function opens the file in `append` mode. If the extension of `outfile` is
-/// `.bz2`, the bytes will be compressed using bzip2 before being written.
+/// `.bz2` or `bz2` is `true`, the bytes will be compressed using bzip2 before being written.
 ///
 /// # Errors
-/// If opening the file in append mode is not possible (permissions, path doesn't exist, etc.). See [`std::fs::File::open`].
+/// If opening the file in append mode is not possible (permissions, path doesn't exist, etc.). See [`File::open`].
 ///
-/// If an error is encountered when compressing the bytes.
-///
-/// If an error is encountered when writing the bytes to the filesystem. See [`std::io::Write::write_all`]
+/// If an error is encountered when writing the bytes to the filesystem. See [`Write::write_all`]
 pub(crate) fn bytes_to_file<P: AsRef<Path>>(
     bytes: Vec<u8>,
     outfile: P,
+    bz2: bool,
 ) -> Result<(), std::io::Error> {
-    let mut out_bytes: Vec<u8> = vec![];
     let compress_file: bool =
-        matches!(outfile.as_ref().extension(), Some(ext) if ext == OsStr::new("bz2"));
+        bz2 || matches!(outfile.as_ref().extension(), Some(ext) if ext == OsStr::new("bz2"));
     let mut file: File = OpenOptions::new().append(true).create(true).open(outfile)?;
     if compress_file {
-        let mut compressor = BzEncoder::new(bytes.as_slice(), Compression::best());
-        compressor.read_to_end(&mut out_bytes)?;
+        write_bytes_bz2(bytes, &mut file)
     } else {
-        out_bytes = bytes;
+        file.write_all(&bytes)
     }
+}
 
-    file.write_all(&out_bytes)
+/// Writes `bytes` to a [`Write`] implementor, compressing with [`bzip2::BzEncoder`] first.
+///
+/// # Errors
+/// From [`compress_bz2`] or [`Write::write_all`].
+pub(crate) fn write_bytes_bz2<W: Write>(
+    bytes: Vec<u8>,
+    writer: &mut W,
+) -> Result<(), std::io::Error> {
+    let out_bytes: Vec<u8> = compress_bz2(&bytes)?;
+    writer.write_all(&out_bytes)
 }
diff --git a/src/lib.rs b/src/lib.rs
@@ -62,7 +62,7 @@
 //!
 //! // Write the records to a file
 //! let out_path = PathBuf::from("tests/test_files/output.rawacf");
-//! RawacfRecord::write_to_file(&rawacf_data, &out_path)?;
+//! RawacfRecord::write_to_file(&rawacf_data, &out_path, false)?;
 //! # std::fs::remove_file(out_path)?;
 //! #    Ok(())
 //! # }
@@ -121,9 +121,10 @@ macro_rules! write_rust {
             pub fn [< try_write_ $type >]<P: AsRef<Path>>(
                 recs: Vec<IndexMap<String, DmapField>>,
                 outfile: P,
+                bz2: bool,
             ) -> Result<(), DmapError> {
                 let bytes = [< $type:camel Record >]::try_into_bytes(recs)?;
-                crate::io::bytes_to_file(bytes, outfile).map_err(DmapError::from)
+                crate::io::bytes_to_file(bytes, outfile, bz2).map_err(DmapError::from)
             }
         }
     }
@@ -308,9 +309,14 @@ read_py!(
 /// does not know that typically `stid` is two bytes.
 #[pyfunction]
 #[pyo3(name = "write_dmap")]
-#[pyo3(text_signature = "(recs: list[dict], outfile: str, /)")]
-fn write_dmap_py(recs: Vec<IndexMap<String, DmapField>>, outfile: PathBuf) -> PyResult<()> {
-    try_write_dmap(recs, &outfile).map_err(PyErr::from)
+#[pyo3(signature = (recs, outfile, /, bz2))]
+#[pyo3(text_signature = "(recs: list[dict], outfile: str, /, bz2: bool = False)")]
+fn write_dmap_py(
+    recs: Vec<IndexMap<String, DmapField>>,
+    outfile: PathBuf,
+    bz2: bool,
+) -> PyResult<()> {
+    try_write_dmap(recs, &outfile, bz2).map_err(PyErr::from)
 }
 
 /// Checks that a list of dictionaries contains valid DMAP records, then converts them to bytes.
@@ -321,9 +327,17 @@ fn write_dmap_py(recs: Vec<IndexMap<String, DmapField>>, outfile: PathBuf) -> Py
 /// does not know that typically `stid` is two bytes.
 #[pyfunction]
 #[pyo3(name = "write_dmap_bytes")]
-#[pyo3(text_signature = "(recs: list[dict], /)")]
-fn write_dmap_bytes_py(py: Python, recs: Vec<IndexMap<String, DmapField>>) -> PyResult<Py<PyAny>> {
-    let bytes = DmapRecord::try_into_bytes(recs).map_err(PyErr::from)?;
+#[pyo3(signature = (recs, /, bz2))]
+#[pyo3(text_signature = "(recs: list[dict], /, bz2: bool = False)")]
+fn write_dmap_bytes_py(
+    py: Python,
+    recs: Vec<IndexMap<String, DmapField>>,
+    bz2: bool,
+) -> PyResult<Py<PyAny>> {
+    let mut bytes = DmapRecord::try_into_bytes(recs).map_err(PyErr::from)?;
+    if bz2 {
+        bytes = compression::compress_bz2(&bytes).map_err(PyErr::from)?;
+    }
     Ok(PyBytes::new(py, &bytes).into())
 }
 
@@ -334,18 +348,23 @@ macro_rules! write_py {
             #[doc = "Checks that a list of dictionaries contains valid `" $name:upper "` records, then appends to outfile." ]
             #[pyfunction]
             #[pyo3(name = $fn_name)]
-            #[pyo3(text_signature = "(recs: list[dict], outfile: str, /)")]
-            fn [< write_ $name _py >](recs: Vec<IndexMap<String, DmapField>>, outfile: PathBuf) -> PyResult<()> {
-                [< try_write_ $name >](recs, &outfile).map_err(PyErr::from)
+            #[pyo3(signature = (recs, outfile, /, bz2))]
+            #[pyo3(text_signature = "(recs: list[dict], outfile: str, /, bz2: bool = False)")]
+            fn [< write_ $name _py >](recs: Vec<IndexMap<String, DmapField>>, outfile: PathBuf, bz2: bool) -> PyResult<()> {
+                [< try_write_ $name >](recs, &outfile, bz2).map_err(PyErr::from)
             }
 
             #[doc = "Checks that a list of dictionaries contains valid `" $name:upper "` records, then converts them to bytes." ]
             #[doc = "Returns `list[bytes]`, one entry per record." ]
             #[pyfunction]
             #[pyo3(name = $bytes_name)]
-            #[pyo3(text_signature = "(recs: list[dict], /)")]
-            fn [< write_ $name _bytes_py >](py: Python, recs: Vec<IndexMap<String, DmapField>>) -> PyResult<Py<PyAny>> {
-                let bytes = [< $name:camel Record >]::try_into_bytes(recs).map_err(PyErr::from)?;
+            #[pyo3(signature = (recs, /, bz2))]
+            #[pyo3(text_signature = "(recs: list[dict], /, bz2: bool = False)")]
+            fn [< write_ $name _bytes_py >](py: Python, recs: Vec<IndexMap<String, DmapField>>, bz2: bool) -> PyResult<Py<PyAny>> {
+                let mut bytes = [< $name:camel Record >]::try_into_bytes(recs).map_err(PyErr::from)?;
+                if bz2 {
+                    bytes = compression::compress_bz2(&bytes).map_err(PyErr::from)?;
+                }
                 Ok(PyBytes::new(py, &bytes).into())
             }
         }
diff --git a/src/record.rs b/src/record.rs
diff --git a/tests/tests.rs b/tests/tests.rs