From 9d644aa18ccd1050d677488c43826406be6aa00b Mon Sep 17 00:00:00 2001
From: scaramallion <scaramallion@users.noreply.github.com>
Date: Tue, 6 May 2025 07:52:55 +1000
Subject: [PATCH 1/9] WIP

---
 Cargo.lock     |  40 ++++++++
 Cargo.toml     |   1 +
 README.md      |  46 ++++-----
 pyproject.toml |   2 +-
 rle/utils.py   |  18 +++-
 src/lib.rs     | 253 +++++++++++++++++++++++++++++++++++++++----------
 6 files changed, 282 insertions(+), 78 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 77d35d4..fcc00f6 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -8,12 +8,30 @@ version = "1.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
 
+[[package]]
+name = "bitvec"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c"
+dependencies = [
+ "funty",
+ "radium",
+ "tap",
+ "wyz",
+]
+
 [[package]]
 name = "cfg-if"
 version = "1.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
 
+[[package]]
+name = "funty"
+version = "2.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c"
+
 [[package]]
 name = "heck"
 version = "0.5.0"
@@ -66,6 +84,7 @@ dependencies = [
 name = "pylibjpeg-rle"
 version = "1.1.0"
 dependencies = [
+ "bitvec",
  "pyo3",
 ]
 
@@ -141,6 +160,12 @@ dependencies = [
  "proc-macro2",
 ]
 
+[[package]]
+name = "radium"
+version = "0.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09"
+
 [[package]]
 name = "syn"
 version = "2.0.101"
@@ -152,6 +177,12 @@ dependencies = [
  "unicode-ident",
 ]
 
+[[package]]
+name = "tap"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369"
+
 [[package]]
 name = "target-lexicon"
 version = "0.13.2"
@@ -169,3 +200,12 @@ name = "unindent"
 version = "0.2.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3"
+
+[[package]]
+name = "wyz"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed"
+dependencies = [
+ "tap",
+]
diff --git a/Cargo.toml b/Cargo.toml
index a78c8e7..bec581f 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -12,3 +12,4 @@ crate-type = ["cdylib"]
 
 [dependencies]
 pyo3 = { version = "0.24.2", features = ["extension-module"] }
+bitvec = { version = "1.0.1" }
diff --git a/README.md b/README.md
index 6f0f08a..4782f7b 100644
--- a/README.md
+++ b/README.md
@@ -8,7 +8,7 @@
 
 ## pylibjpeg-rle
 
-A fast DICOM ([PackBits](https://en.wikipedia.org/wiki/PackBits)) RLE plugin for [pylibjpeg](https://github.com/pydicom/pylibjpeg), written in Rust with a Python 3.7+ wrapper.
+A fast DICOM ([PackBits](https://en.wikipedia.org/wiki/PackBits)) RLE plugin for [pylibjpeg](https://github.com/pydicom/pylibjpeg), written in Rust with a Python wrapper.
 
 Linux, MacOS and Windows are all supported.
 
@@ -85,32 +85,32 @@ ds.save_as('as_rle.dcm')
 ### Benchmarks
 #### Decoding
 
-Time per 1000 decodes, pydicom's default RLE handler vs. pylibjpeg-rle
+Time per 1000 decodes, pydicom's default RLE decoder vs. pylibjpeg-rle:
 
 | Dataset                     | Pixels  | Bytes   | pydicom | pylibjpeg-rle |
 | ---                         | ---     | ---     | ---     | ---           |
-| OBXXXX1A_rle.dcm            | 480,000 | 480,000 | 4.89 s  |        0.79 s |
-| OBXXXX1A_rle_2frame.dcm     | 960,000 | 960,000 | 9.89 s  |        1.65 s |
-| SC_rgb_rle.dcm              |  10,000 |  30,000 | 0.20 s  |        0.15 s |
-| SC_rgb_rle_2frame.dcm       |  20,000 |  60,000 | 0.32 s  |        0.18 s |
-| MR_small_RLE.dcm            |   4,096 |   8,192 | 0.35 s  |        0.13 s |
-| emri_small_RLE.dcm          |  40,960 |  81,920 | 1.13 s  |        0.28 s |
-| SC_rgb_rle_16bit.dcm        |  10,000 |  60,000 | 0.33 s  |        0.17 s |
-| SC_rgb_rle_16bit_2frame.dcm |  20,000 | 120,000 | 0.56 s  |        0.21 s |
-| rtdose_rle_1frame.dcm       |     100 |     400 | 0.12 s  |        0.13 s |
-| rtdose_rle.dcm              |   1,500 |   6,000 | 0.53 s  |        0.26 s |
-| SC_rgb_rle_32bit.dcm        |  10,000 | 120,000 | 0.56 s  |        0.19 s |
-| SC_rgb_rle_32bit_2frame.dcm |  20,000 | 240,000 | 1.03 s  |        0.28 s |
+| OBXXXX1A_rle.dcm            | 480,000 | 480,000 |  5.7 s  |        1.1 s  |
+| OBXXXX1A_rle_2frame.dcm     | 960,000 | 960,000 | 11.5 s  |        2.1 s  |
+| SC_rgb_rle.dcm              |  10,000 |  30,000 | 0.28 s  |        0.19 s |
+| SC_rgb_rle_2frame.dcm       |  20,000 |  60,000 | 0.45 s  |        0.28 s |
+| MR_small_RLE.dcm            |   4,096 |   8,192 | 0.46 s  |        0.15 s |
+| emri_small_RLE.dcm          |  40,960 |  81,920 | 1.8 s   |        0.67 s |
+| SC_rgb_rle_16bit.dcm        |  10,000 |  60,000 | 0.48 s  |        0.25 s |
+| SC_rgb_rle_16bit_2frame.dcm |  20,000 | 120,000 | 0.86 s  |        0.39 s |
+| rtdose_rle_1frame.dcm       |     100 |     400 | 0.16 s  |        0.13 s |
+| rtdose_rle.dcm              |   1,500 |   6,000 | 1.0 s   |        0.64 s |
+| SC_rgb_rle_32bit.dcm        |  10,000 | 120,000 | 0.82 s  |        0.35 s |
+| SC_rgb_rle_32bit_2frame.dcm |  20,000 | 240,000 | 1.5 s   |        0.60 s |
 
 #### Encoding
 
-Time per 1000 encodes, pydicom's default RLE handler vs. pylibjpeg-rle
+Time per 1000 encodes, pydicom's default RLE encoder vs. pylibjpeg-rle and [python-gdcm](https://github.com/tfmoraes/python-gdcm):
 
-| Dataset            | Pixels  | Bytes   | pydicom | pylibjpeg-rle |
-| ---                | ---     | ---     | ---     | ---           |
-| OBXXXX1A.dcm       | 480,000 | 480,000 | 30.7 s  |       1.36 s  |
-| SC_rgb.dcm         |  10,000 |  30,000 | 1.80 s  |       0.09 s  |
-| MR_small.dcm       |   4,096 |   8,192 | 2.29 s  |       0.04 s  |
-| SC_rgb_16bit.dcm   |  10,000 |  60,000 | 3.57 s  |       0.17 s  |
-| rtdose_1frame.dcm  |     100 |     400 | 0.19 s  |       0.003 s |
-| SC_rgb_32bit.dcm   |  10,000 | 120,000 | 7.20 s  |       0.33 s  |
+| Dataset            | Pixels  | Bytes   | pydicom | pylibjpeg-rle | python-gdcm |
+| ---                | ---     | ---     | ---     | ---           | ---         |
+| OBXXXX1A.dcm       | 480,000 | 480,000 | 30.6 s  |       1.4 s   | 1.5 s       |
+| SC_rgb.dcm         |  10,000 |  30,000 |  1.9 s  |       0.11 s  | 0.21 s      |
+| MR_small.dcm       |   4,096 |   8,192 |  3.0 s  |       0.11 s  | 0.29 s      |
+| SC_rgb_16bit.dcm   |  10,000 |  60,000 |  3.6 s  |       0.18 s  | 0.28 s      |
+| rtdose_1frame.dcm  |     100 |     400 | 0.28 s  |       0.04 s  | 0.14 s      |
+| SC_rgb_32bit.dcm   |  10,000 | 120,000 |  7.1 s  |       0.32 s  | 0.43 s      |
diff --git a/pyproject.toml b/pyproject.toml
index 859c0af..e79a7b6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -32,7 +32,7 @@ keywords = ["dicom pydicom python rle pylibjpeg rust"]
 license = "MIT"
 name = "pylibjpeg-rle"
 readme = "README.md"
-version = "2.1.0"
+version = "2.2.0.dev0"
 requires-python = ">=3.10"
 dependencies = [
   "numpy>=2.0",
diff --git a/rle/utils.py b/rle/utils.py
index 63c78ef..c9e491f 100644
--- a/rle/utils.py
+++ b/rle/utils.py
@@ -106,6 +106,10 @@ def encode_array(
 
     .. versionadded:: 1.1
 
+    .. versionchanged:: 2.2
+
+        Added support for 'bits_allocated' 1.
+
     Parameters
     ----------
     arr : numpy.ndarray
@@ -124,7 +128,7 @@ def encode_array(
         * ``samples_per_px': int`` the number of samples per pixel, either
           1 for monochrome or 3 for RGB or similar data.
         * ``'bits_per_px': int`` the number of bits needed to contain each
-          pixel, either 8, 16, 32 or 64.
+          pixel, either 1, 8, 16, 32 or 64.
         * ``'nr_frames': int`` the number of frames in `arr`, required if
           more than one frame is present.
 
@@ -169,6 +173,10 @@ def encode_pixel_data(
         to 15 in order to meet the requirements of the *RLE Lossless*
         transfer syntax.
 
+    .. versionchanged:: 2.2
+
+        Added support for 'bits_allocated' 1.
+
     Parameters
     ----------
     src : bytes
@@ -189,7 +197,7 @@ def encode_pixel_data(
         * ``samples_per_pixel': int`` the number of samples per pixel, either
           1 for monochrome or 3 for RGB or similar data.
         * ``'bits_allocated': int`` the number of bits needed to contain each
-          pixel, either 8, 16, 32 or 64.
+          pixel, either 1, 8, 16, 32 or 64.
 
     Returns
     -------
@@ -212,9 +220,9 @@ def encode_pixel_data(
         msg = "(0028,0002) 'Samples per Pixel'" if ds else "'samples_per_pixel'"
         raise ValueError(f"{msg} must be 1 or 3")
 
-    if bpp not in [8, 16, 32, 64]:
+    if bpp not in [1, 8, 16, 32, 64]:
         msg = "(0028,0100) 'Bits Allocated'" if ds else "'bits_allocated'"
-        raise ValueError(f"{msg} must be 8, 16, 32 or 64")
+        raise ValueError(f"{msg} must be 1, 8, 16, 32 or 64")
 
     if bpp / 8 * spp > 15:
         raise ValueError(
@@ -235,6 +243,7 @@ def encode_pixel_data(
     return cast(bytes, encode_frame(src, r, c, spp, bpp, byteorder))
 
 
+# TODO: unpack Bits Stored 1
 def generate_frames(ds: "Dataset", reshape: bool = True) -> Iterator[np.ndarray]:
     """Yield a *Pixel Data* frame from `ds` as an :class:`~numpy.ndarray`.
 
@@ -310,6 +319,7 @@ def generate_frames(ds: "Dataset", reshape: bool = True) -> Iterator[np.ndarray]
             yield arr.transpose(1, 2, 0)
 
 
+# TODO: unpack Bits Stored 1
 def pixel_array(ds: "Dataset") -> "np.ndarray":
     """Return the entire *Pixel Data* as an :class:`~numpy.ndarray`.
 
diff --git a/src/lib.rs b/src/lib.rs
index ecd05d6..b5f5c74 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -94,7 +94,7 @@ fn decode_frame<'py>(
         The total number of pixels in the frame (rows x columns),
         maximum (2^32 - 1).
     bpp : int
-        The number of bits per pixel, supported values 8, 16, 32, 64.
+        The number of bits per pixel, supported values 1, 8, 16, 32, 64.
     byteorder : str
         The byte order of the returned data, '<' for little endian, '>' for
         big endian.
@@ -123,7 +123,7 @@ fn _decode_frame(
     nr_pixels
         The total number of pixels in the frame (rows x columns).
     bpp
-        The number of bits per pixel, should be a multiple of 8 and no larger
+        The number of bits per pixel, should be 1 or a multiple of 8 and no larger
         than 64.
     byteorder
         The byte order of the decoded data, '<' for little endian, '>' for
@@ -133,7 +133,7 @@ fn _decode_frame(
     // Pre-define our errors for neatness
     let err_invalid_bits_allocated = Err(
         String::from(
-            "The (0028,0100) 'Bits Allocated' value must be 8, 16, 32 or 64"
+            "The (0028,0100) 'Bits Allocated' value must be 1, 8, 16, 32 or 64"
         ).into()
     );
     let err_invalid_offset = Err(
@@ -154,24 +154,31 @@ fn _decode_frame(
         String::from("'byteorder' must be '>' or '<'").into()
     );
 
-    // Ensure we have a valid bits/px value
+    let mut is_bit_packed: bool = false;
+
+    // Ensure we have a valid bits/px value: 1 or a multiple of 8
     match bpp {
         0 => return err_invalid_bits_allocated,
+        1 => { is_bit_packed = true; },
         _ => match bpp % 8 {
-            0 => {},
+            0 => { },
             _ => return err_invalid_bits_allocated
         }
     }
 
-    // Ensure `bytes_per_pixel` is in [1, 2, 4, 8]
-    let bytes_per_pixel: u8 = bpp / 8;
-    match bytes_per_pixel {
-        1 => {},
-        2 | 4 | 8 => match byteorder {
-            '>' | '<' => {},
-            _ => return err_invalid_byteorder
-        },
-        _ => return err_invalid_bits_allocated
+    // For non-bit-packed data ensure `bytes_per_pixel` is in [1, 2, 4, 8]
+    if !is_bit_packed {
+        let bytes_per_pixel: u8 = bpp / 8;
+        match bytes_per_pixel {
+            1 => {},
+            2 | 4 | 8 => match byteorder {
+                '>' | '<' => {},
+                _ => return err_invalid_byteorder
+            },
+            _ => return err_invalid_bits_allocated
+        }
+    } else {
+
     }
 
     // Parse the RLE header and check results
@@ -213,14 +220,6 @@ fn _decode_frame(
         _ => return err_invalid_nr_samples
     }
 
-    // Watch for overflow here; u32 * u32 -> u64
-    let expected_length = usize::try_from(
-        nr_pixels * u32::from(bytes_per_pixel * spp)
-    ).unwrap();
-
-    // Pre-allocate a vector for the decoded frame
-    let mut frame = vec![0u8; expected_length];
-
     /*
     Example
     -------
@@ -238,47 +237,201 @@ fn _decode_frame(
       MSB LSB MSB LSB ... MSB LSB | MSB LSB MSB LSB ... MSB LSB | ...
     */
 
+    /*
+    Bit-packed data
+    ---------------
+
+    For bit-packed data (i.e. a *Bits Allocated* of 1), we decode the RLE encoding but
+    leave the bit-packing in place. This means the length of each decoded segment is not
+    equal to the number of pixels but instead the number of pixels / 8, rounded up to the
+    nearest whole integer with the difference being accounted for by padding 0b0 bits.
+
+    Each decoded segment must have these padding bits removed and the final frame should be
+    the concatenated unpadded segments. Fortunately for bit-packed data there can be at
+    most 3 segments (with a *Samples per Pixel* of 3).
+
+    Might be a good idea to use bool arrays instead of u8 and implicitly unpack to avoid
+    annoyances with multi-segment padding, then convert to u8 at the end.
+
+    bitvec perhaps?
+    */
+
     // Decode each segment and place it into the vector
     // ------------------------------------------------
     let pps = usize::try_from(nr_pixels).unwrap();
     // Concatenate sample planes into a frame
-    for sample in 0..spp {  // 0 or (0, 1, 2)
-        // Sample offset
-        let so = usize::from(sample * bytes_per_pixel) * pps;
-
-        // Interleave the segments into a sample plane
-        for byte_offset in 0..bytes_per_pixel {  // 0, [1, 2, 3, ..., 7]
-            // idx should be in range [0, 15]
-            let idx: usize;
-            if byteorder == '>' { // big-endian
-                // e.g. 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
-                idx = usize::from(sample * bytes_per_pixel + byte_offset);
-            } else { // little-endian
-                // e.g. 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8
-                idx = usize::from(
-                    bytes_per_pixel - byte_offset + bytes_per_pixel * sample
-                ) - 1;
+    if !is_bit_packed {
+        // Watch for overflow here; u32 * u32 -> u64
+        // Actual values are (u16 * u16) * u8
+        let expected_length = usize::try_from(
+            nr_pixels * u32::from(bytes_per_pixel * spp)
+        ).unwrap();
+
+        // Pre-allocate a vector for the decoded frame
+        let mut frame = vec![0u8; expected_length];
+
+        for sample in 0..spp {  // 0 or (0, 1, 2)
+            // Sample offset
+            let so = usize::from(sample * bytes_per_pixel) * pps;
+
+            // Interleave the segments into a sample plane
+            for byte_offset in 0..bytes_per_pixel {  // 0, [1, 2, 3, ..., 7]
+                // idx should be in range [0, 15]
+                let idx: usize;
+                if byteorder == '>' { // big-endian
+                    // e.g. 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
+                    idx = usize::from(sample * bytes_per_pixel + byte_offset);
+                } else { // little-endian
+                    // e.g. 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8
+                    idx = usize::from(
+                        bytes_per_pixel - byte_offset + bytes_per_pixel * sample
+                    ) - 1;
+                }
+
+                // offsets[idx] is u32 -> usize not guaranteed
+                let start = usize::try_from(offsets[idx]).unwrap();
+                let end = usize::try_from(offsets[idx + 1]).unwrap();
+
+                // Decode the segment into the frame
+                let len = _decode_segment_into_frame(
+                    <&[u8]>::try_from(&src[start..end]).unwrap(),
+                    &mut frame,
+                    usize::from(bytes_per_pixel),
+                    usize::from(byte_offset) + so
+                )?;
+                if len != pps { return err_segment_length }
             }
+        }
+
+        return Ok(frame);
+
+    // Bit-packed data
+    // u8 * u32
+    // FIXME
+    let bits_per_segment = usize::try_from(8 * nr_pixels).unwrap();  // wrong!
+    // Each segment is padded to an even length, so segment boundaries should be
+    // whole integers
+
+    // The number of whole bytes per segment, including any padding
+    let mut padding_bytes = 0;
+    match bits_per_segment % 16 {
+        0 => {},
+        1..=8 => { padding_bytes = 2 },
+        9..=15 => { padding_bytes = 1 },
+    }
+    let bytes_per_segment = bits_per_segment / 8 + padding_bytes;
 
-            // offsets[idx] is u32 -> usize not guaranteed
-            let start = usize::try_from(offsets[idx]).unwrap();
-            let end = usize::try_from(offsets[idx + 1]).unwrap();
-
-            // Decode the segment into the frame
-            let len = _decode_segment_into_frame(
-                <&[u8]>::try_from(&src[start..end]).unwrap(),
-                &mut frame,
-                usize::from(bytes_per_pixel),
-                usize::from(byte_offset) + so
-            )?;
-            if len != pps { return err_segment_length }
+    let mut frame: Vec<u8> = Vec::new();
+    for sample in 0..spp {  // 0..0 or 0..2
+        // Start and end indices of the segment
+        let start = usize::try_from(offsets[idx]).unwrap();
+        let end = usize::try_from(offsets[idx + 1]).unwrap();
+
+        // Decode the segment into the frame
+        let segment = _decode_segment_into_frame(
+            <&[u8]>::try_from(&src[start..end]).unwrap(),
+            bytes_per_segment,
+        )?;
+        if segment.len() != bytes_per_segment { return err_segment_length }
+
+        if spp == 0 { return Ok(segment) }
+
+        if !is_padded {
+            frame.extend(segment);
+        } else {
+            // hmm
         }
+
     }
 
+    // Concatenate the segments together, bit shifting to ensure any padding is removed
+
     Ok(frame)
 }
 
 
+/*
+fn _shift_bit_packed_segment(
+    prefix: u8, src: Vec<u8>, shift: u8
+) -> Vec<u8> {
+    assert!(0 < shift < 8);
+
+    let mut dst = Vec::new();
+    // shift the prefix to the right by the required shift
+    dst.push(prefix >> (8 - shift))
+
+    for idx in 0..src.len() {
+        dst[idx] = src[idx] << shift;
+        dst[idx] |= src[idx + 1] >> (8 - shift);
+    }
+
+    dst
+}
+*/
+
+
+fn _decode_bit_packed_segment(
+    src: &[u8], segment_length: usize
+) -> Result<Vec<u8>, Box<dyn Error>> {
+    //
+    let eod = src.len() - 1;
+    let mut pos = 0;
+    let mut header_byte: usize;
+    let mut dst: Vec<u8> = Vec::new();
+
+    let err_eod = Err(
+        String::from(
+            "The end of the data was reached before the segment was \
+            completely decoded"
+        ).into()
+    );
+
+    loop {
+        // `header_byte` is equivalent to N in the DICOM Standard
+        // usize is at least u8
+        header_byte = usize::from(src[pos]);
+        pos += 1;
+        if header_byte > 128 {
+            // Extend by copying the next byte (-N + 1) times
+            // however since using uint8 instead of int8 this will be
+            // (256 - N + 1) times
+            op_len = 257 - header_byte;
+
+            // Check we have enough encoded data
+            if pos > eod { return err_eod }
+
+            // Check segment for excess padding
+            if (idx + op_len) > segment_length {
+                dst.extend([src[pos]; segment_length - idx]);
+
+                return Ok(dst)
+            }
+
+            dst.extend([src[pos]; op_len]);
+            pos += 1;
+        } else if header_byte < 128 {
+            // Extend by literally copying the next (N + 1) bytes
+            op_len = header_byte + 1;
+
+            // Check we have enough encoded data
+            if (pos + header_byte) > eod { return err_eod }
+
+            // Check segment for excess padding
+            if (idx + op_len) > segment_length {
+                dst.extend(src[pos..pos + segment_length - idx]);
+
+                return Ok(dst)
+            }
+
+            dst.extend(src[pos..pos + op_len]);
+            pos += header_byte + 1;
+        } // header_byte == 128 is noop
+
+        if pos >= eod { return Ok(dst) }
+    }
+}
+
+
 fn _decode_segment_into_frame(
     src: &[u8], dst: &mut Vec<u8>, bpp: usize, initial_offset: usize
 ) -> Result<usize, Box<dyn Error>> {

From a9bb303cfcb1369c38d23c18cc70682f78ff4c3d Mon Sep 17 00:00:00 2001
From: scaramallion <scaramallion@users.noreply.github.com>
Date: Fri, 9 May 2025 15:55:37 +1000
Subject: [PATCH 2/9] WIP2

---
 rle/tests/test_decode.py | 116 +++++++++++++++++++++++++-
 rle/tests/test_utils.py  |   2 +-
 src/lib.rs               | 170 +++++++++++++++++++++------------------
 3 files changed, 203 insertions(+), 85 deletions(-)

diff --git a/rle/tests/test_decode.py b/rle/tests/test_decode.py
index 36df11e..13247dd 100644
--- a/rle/tests/test_decode.py
+++ b/rle/tests/test_decode.py
@@ -69,19 +69,19 @@ def as_bytes(self, offsets):
 
     def test_bits_allocated_zero_raises(self):
         """Test exception raised for BitsAllocated 0."""
-        msg = r"The \(0028,0100\) 'Bits Allocated' value must be 8, 16, 32 or 64"
+        msg = r"The \(0028,0100\) 'Bits Allocated' value must be 1, 8, 16, 32 or 64"
         with pytest.raises(ValueError, match=msg):
             decode_frame(b"\x00\x00\x00\x00", 1, 0, "<")
 
     def test_bits_allocated_not_octal_raises(self):
-        """Test exception raised for BitsAllocated not a multiple of 8."""
-        msg = r"The \(0028,0100\) 'Bits Allocated' value must be 8, 16, 32 or 64"
+        """Test exception raised for BitsAllocated not 1 or a multiple of 8."""
+        msg = r"The \(0028,0100\) 'Bits Allocated' value must be 1, 8, 16, 32 or 64"
         with pytest.raises(ValueError, match=msg):
             decode_frame(b"\x00\x00\x00\x00", 1, 12, "<")
 
     def test_bits_allocated_large_raises(self):
         """Test exception raised for BitsAllocated greater than 64."""
-        msg = r"The \(0028,0100\) 'Bits Allocated' value must be 8, 16, 32 or 64"
+        msg = r"The \(0028,0100\) 'Bits Allocated' value must be 1, 8, 16, 32 or 64"
         with pytest.raises(ValueError, match=msg):
             decode_frame(b"\x00\x00\x00\x00", 1, 72, "<")
 
@@ -357,6 +357,114 @@ def test_u32_3s(self):
         assert [4294967295, 16777216, 65536, 256, 1, 0] == arr[6:12].tolist()
         assert [1, 16777216, 65536, 256, 1, 4294967294] == arr[12:].tolist()
 
+    def test_u8_1s_bs1(self):
+        """Test decoding bit packed 1 sample/px."""
+        header = b"\x01\x00\x00\x00\x40\x00\x00\x00"
+        header += (64 - len(header)) * b"\x00"
+        # 0, 64, 128, 160, 192, 255
+        data = b"\x05\x00\x40\x80\xA0\xC0\xFF"
+        # Big endian
+        # 48 px - byte aligned in 6 bytes
+        decoded = decode_frame(header + data, 6 * 8, 1, ">")
+        arr = np.frombuffer(decoded, np.dtype("uint8"))
+        assert [0, 64, 128, 160, 192, 255] == arr.tolist()
+
+        # 37 px -> 5 bytes
+        decoded = decode_frame(header + data, 4 * 8 + 5, 1, ">")
+        arr = np.frombuffer(decoded, np.dtype("uint8"))
+        assert [0, 64, 128, 160, 192] == arr.tolist()
+
+        # 2 px -> 1 byte
+        decoded = decode_frame(header + data, 2, 1, ">")
+        arr = np.frombuffer(decoded, np.dtype("uint8"))
+        assert [0] == arr.tolist()
+
+        # Little-endian
+        decoded = decode_frame(header + data, 6 * 8, 1, "<")
+        arr = np.frombuffer(decoded, np.dtype("uint8"))
+        assert [0, 64, 128, 160, 192, 255] == arr.tolist()
+
+        decoded = decode_frame(header + data, 4 * 8 + 5, 1, "<")
+        arr = np.frombuffer(decoded, np.dtype("uint8"))
+        assert [0, 64, 128, 160, 192] == arr.tolist()
+
+        decoded = decode_frame(header + data, 2, 1, "<")
+        arr = np.frombuffer(decoded, np.dtype("uint8"))
+        assert [0] == arr.tolist()
+
+    def test_u8_3s_bs1(self):
+        """Test decoding bit packed 3 sample/px."""
+        header = (
+            b"\x03\x00\x00\x00"  # 3 segments
+            b"\x40\x00\x00\x00"  # 64
+            b"\x47\x00\x00\x00"  # 71
+            b"\x4E\x00\x00\x00"  # 78
+        )
+        header += (64 - len(header)) * b"\x00"
+        # 0, 64, 128, 160, 192, 255
+        data = (
+            b"\x05\x00\x40\x80\xA0\xC0\xFF"  # R
+            b"\x05\x7F\xC0\x80\x40\x00\xFF"  # B
+            b"\x05\x01\x40\x80\xA0\xC0\xFE"  # G
+        )
+        # 48 px - byte aligned in 18 bytes
+        decoded = decode_frame(header + data, 6 * 8, 1, "<")
+        arr = np.frombuffer(decoded, np.dtype("uint8"))
+        # Ordered all R, all G, all B
+        assert [0, 64, 128, 160, 192, 255] == arr[:6].tolist()
+        assert [127, 192, 128, 64, 0, 255] == arr[6:12].tolist()
+        assert [1, 64, 128, 160, 192, 254] == arr[12:].tolist()
+
+        # 47 px - non-byte aligned in 18 bytes
+        decoded = decode_frame(header + data, 6 * 8 - 1, 1, "<")
+        arr = np.frombuffer(decoded, np.dtype("uint8"))
+        # Boundaries are 47 | 94 | 141
+        #                       v removed
+        # 255 | 127: 0b[1111_111x 0b0]111_1111 -> 0b1111_1110
+        assert [0, 64, 128, 160, 192, 254] == arr[:6].tolist()
+        # Left shift values by 1 bit
+        # 127 | 192: 0b0[111_1111 0b1]100_0000 -> 0b1111_1111
+        # 192 | 128: 0b1[100_0000 0b1]000_0000 -> 0b1000_0001
+        # 128 |  64: 0b1[000_0000 0b0]100_0000 -> 0b0000_0000
+        #  64 |   0: 0b0[100_0000 0b0]000_0000 -> 0b1000_0000
+        #   0 | 255: 0b0[000_0000 0b1]111_1111 -> 0b0000_0001
+        #                       v removed
+        # 255 |   1: 0b1[111_111x 0b00]00_0001 -> 0b1111_1100
+        assert [255, 129, 0, 128, 1, 252] == arr[6:12].tolist()
+        # Left shift values by 2 bits
+        #   1 |  64: 0b00[00_0001 0b01]00_0000 -> 0b0000_0101
+        #  64 | 128: 0b01[00_0000 0b10]00_0000 -> 0b0000_0010
+        # 128 | 160: 0b10[00_0000 0b10]10_0000 -> 0b0000_0010
+        # 160 | 192: 0b10[10_0000 0b11]00_0000 -> 0b1000_0011
+        # 192 | 254: 0b11[00_0000 0b11]11_1110 -> 0b0000_0011
+        #                       v removed
+        # 254 |   x: 0b11[11_111x             -> 0b1111_1000
+        assert [5, 2, 2, 131, 3, 248] == arr[12:].tolist()
+
+        # 41 px - non-byte aligned in 16 bytes
+        decoded = decode_frame(header + data, 5 * 8 + 1, 1, "<")
+        arr = np.frombuffer(decoded, np.dtype("uint8"))
+        # Boundaries are 48 | 96 | 144 -> 41 | 82 | 123
+        #                vvv vvvv removed
+        # 255 | 255: 0b[1xxx_xxxx 0b0111_111]1 -> 0b1011_1111
+        assert [0, 64, 128, 160, 192, 191] == arr[:6].tolist()
+        # Left shift values by 7 bits
+        # 255 | 192: 0b1111_111[1 0b1100_000]0 -> 0b1110_0000
+        # 192 | 128: 0b1100_000[0 0b1000_000]0 -> 0b0100_0000
+        # 128 |  64: 0b1000_000[0 0b0100_000]0 -> 0b0010_0000
+        #  64 |   0: 0b0100_000[0 0b0000_000]0 -> 0b0000_0000
+        #                                  vvv vvvv removed
+        #   0 | 255 |   1: 0b0000_000[0 0b1xxx_xxxx 0b0000_00]01 -> 0b0100_0000
+        assert [224, 64, 32, 0, 64] == arr[6:11].tolist()
+        # Left shift values by 14 bits
+        #   1 |  64: 0b0000_00[01 0b0100_00]00 -> 0b0101_0000
+        #  64 | 128: 0b0100_00[00 0b1000_00]00 -> 0b0010_0000
+        # 128 | 160: 0b1000_00[00 0b1010_00]00 -> 0b0010_1000
+        # 160 | 192: 0b1010_00[00 0b1100_00]00 -> 0b0011_0000
+        #                           vvv vvvv removed
+        # 192 | 254: 0b1100_[00 0b1]xxx_xxxx   -> 0b0010_0000
+        assert [80, 32, 40, 48, 32] == arr[11:].tolist()
+
 
 @pytest.mark.skipif(not HAVE_PYDICOM, reason="No pydicom")
 class TestDecodeFrame_Datasets:
diff --git a/rle/tests/test_utils.py b/rle/tests/test_utils.py
index 5371d88..87c66ee 100644
--- a/rle/tests/test_utils.py
+++ b/rle/tests/test_utils.py
@@ -199,7 +199,7 @@ def test_bad_bits_allocated_raises(self):
             "byteorder": "<",
         }
 
-        msg = r"'bits_allocated' must be 8, 16, 32 or 64"
+        msg = r"'bits_allocated' must be 1, 8, 16, 32 or 64"
         with pytest.raises(ValueError, match=msg):
             encode_pixel_data(b"", **kwargs)
 
diff --git a/src/lib.rs b/src/lib.rs
index b5f5c74..d97e4a5 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -2,6 +2,8 @@
 use std::convert::TryFrom;
 use std::error::Error;
 
+use bitvec::prelude::*;
+
 use pyo3::prelude::*;
 use pyo3::wrap_pyfunction;
 use pyo3::types::{PyBytes, PyByteArray};
@@ -154,31 +156,32 @@ fn _decode_frame(
         String::from("'byteorder' must be '>' or '<'").into()
     );
 
-    let mut is_bit_packed: bool = false;
-
-    // Ensure we have a valid bits/px value: 1 or a multiple of 8
+    // Check 'Bits Allocated' is 1 or a multiple of 8
+    let mut is_bit_packed = false;
+    let mut bytes_per_pixel: u8;
     match bpp {
         0 => return err_invalid_bits_allocated,
-        1 => { is_bit_packed = true; },
+        1 => {
+                is_bit_packed = true;
+                bytes_per_pixel = 1;
+        },
         _ => match bpp % 8 {
-            0 => { },
-            _ => return err_invalid_bits_allocated
-        }
-    }
-
-    // For non-bit-packed data ensure `bytes_per_pixel` is in [1, 2, 4, 8]
-    if !is_bit_packed {
-        let bytes_per_pixel: u8 = bpp / 8;
-        match bytes_per_pixel {
-            1 => {},
-            2 | 4 | 8 => match byteorder {
-                '>' | '<' => {},
-                _ => return err_invalid_byteorder
+            0 => {
+                bytes_per_pixel = bpp / 8;
             },
             _ => return err_invalid_bits_allocated
         }
-    } else {
+    }
 
+    // Check `byteorder` is a valid character
+    // Check *Bits Allocated* is in [1, 8, 16, 32, 64]
+    match bytes_per_pixel {
+        1 => {},
+        2 | 4 | 8 => match byteorder {
+            '>' | '<' => {},
+            _ => return err_invalid_byteorder
+        },
+        _ => return err_invalid_bits_allocated
     }
 
     // Parse the RLE header and check results
@@ -187,6 +190,8 @@ fn _decode_frame(
     let encoded_length = src.len();
     if encoded_length < 64 { return err_insufficient_data }
 
+    // Don't need to check the unwrap as we just checked
+    //   there's enough data in `src`
     let header = <&[u8; 64]>::try_from(&src[0..64]).unwrap();
     let all_offsets: [u32; 15] = _parse_header(header);
 
@@ -252,19 +257,18 @@ fn _decode_frame(
 
     Might be a good idea to use bool arrays instead of u8 and implicitly unpack to avoid
     annoyances with multi-segment padding, then convert to u8 at the end.
-
-    bitvec perhaps?
     */
 
     // Decode each segment and place it into the vector
     // ------------------------------------------------
+    // TODO: handle unwrap
     let pps = usize::try_from(nr_pixels).unwrap();
     // Concatenate sample planes into a frame
     if !is_bit_packed {
         // Watch for overflow here; u32 * u32 -> u64
         // Actual values are (u16 * u16) * u8
         let expected_length = usize::try_from(
-            nr_pixels * u32::from(bytes_per_pixel * spp)
+            nr_pixels * nr_segments
         ).unwrap();
 
         // Pre-allocate a vector for the decoded frame
@@ -303,81 +307,69 @@ fn _decode_frame(
             }
         }
 
-        return Ok(frame);
+        return Ok(frame)
+    }
 
     // Bit-packed data
-    // u8 * u32
-    // FIXME
-    let bits_per_segment = usize::try_from(8 * nr_pixels).unwrap();  // wrong!
-    // Each segment is padded to an even length, so segment boundaries should be
-    // whole integers
-
-    // The number of whole bytes per segment, including any padding
-    let mut padding_bytes = 0;
-    match bits_per_segment % 16 {
+    // The number of whole bytes per segment after decoding
+    // TODO: handle unwraps
+    let mut bytes_per_segment = usize::try_from(nr_pixels / 8).unwrap();
+    match nr_pixels % 8 {
         0 => {},
-        1..=8 => { padding_bytes = 2 },
-        9..=15 => { padding_bytes = 1 },
+        _ => { bytes_per_segment += 1); },
     }
-    let bytes_per_segment = bits_per_segment / 8 + padding_bytes;
 
     let mut frame: Vec<u8> = Vec::new();
-    for sample in 0..spp {  // 0..0 or 0..2
+    for idx in 0..(offsets.len() - 1) {  // 0..1 or 0..3
         // Start and end indices of the segment
         let start = usize::try_from(offsets[idx]).unwrap();
         let end = usize::try_from(offsets[idx + 1]).unwrap();
-
-        // Decode the segment into the frame
-        let segment = _decode_segment_into_frame(
+        // TODO: match here on the unwrap
+        let segment = _decode_bit_packed_segment(
             <&[u8]>::try_from(&src[start..end]).unwrap(),
-            bytes_per_segment,
+            usize::from(bytes_per_segment),
         )?;
-        if segment.len() != bytes_per_segment { return err_segment_length }
 
-        if spp == 0 { return Ok(segment) }
+        if segment.len() != bytes_per_segment { return err_segment_length }
 
-        if !is_padded {
-            frame.extend(segment);
-        } else {
-            // hmm
-        }
+        if spp == 1 { return Ok(segment) }
 
+        frame.extend(segment);
     }
 
-    // Concatenate the segments together, bit shifting to ensure any padding is removed
-
-    Ok(frame)
-}
-
-
-/*
-fn _shift_bit_packed_segment(
-    prefix: u8, src: Vec<u8>, shift: u8
-) -> Vec<u8> {
-    assert!(0 < shift < 8);
-
-    let mut dst = Vec::new();
-    // shift the prefix to the right by the required shift
-    dst.push(prefix >> (8 - shift))
-
-    for idx in 0..src.len() {
-        dst[idx] = src[idx] << shift;
-        dst[idx] |= src[idx + 1] >> (8 - shift);
+    // Multiple samples but byte aligned
+    if nr_pixels % 8 == 0 { return Ok(frame) }
+
+    // Multiple samples but not byte aligned
+    //  -> need to remove the bit padding between segments
+    // TODO: handle unwraps
+    let mut bv = BitVec::<_, Msb0>::from_vec(frame);
+    let step = usize::try_from(nr_pixels).unwrap();
+    for start in (step..bv.len() - 1).step_by(step) {
+        let end = start + (8 - usize::try_from(nr_pixels % 8).unwrap());
+        let bv2 = bv.drain(start..end);
     }
 
-    dst
+    // Set any trailing bits to 0b0
+    bv.set_uninitialized(false);
+
+    Ok(bv.into_vec())
 }
-*/
 
 
 fn _decode_bit_packed_segment(
     src: &[u8], segment_length: usize
 ) -> Result<Vec<u8>, Box<dyn Error>> {
-    //
+    /*
+
+    The returned data is guaranteed to be no more than segment_length in size.
+    */
     let eod = src.len() - 1;
     let mut pos = 0;
     let mut header_byte: usize;
     let mut dst: Vec<u8> = Vec::new();
+    let mut op_len: usize;
+    let mut idx: usize = 0;  // number of bytes decoded
 
     let err_eod = Err(
         String::from(
@@ -402,12 +394,13 @@ fn _decode_bit_packed_segment(
 
             // Check segment for excess padding
             if (idx + op_len) > segment_length {
-                dst.extend([src[pos]; segment_length - idx]);
+                dst.extend(vec![src[pos]; segment_length - idx]);
 
                 return Ok(dst)
             }
 
-            dst.extend([src[pos]; op_len]);
+            dst.extend(vec![src[pos]; op_len]);
+            idx += op_len;
             pos += 1;
         } else if header_byte < 128 {
             // Extend by literally copying the next (N + 1) bytes
@@ -418,13 +411,14 @@ fn _decode_bit_packed_segment(
 
             // Check segment for excess padding
             if (idx + op_len) > segment_length {
-                dst.extend(src[pos..pos + segment_length - idx]);
+                dst.extend(&src[pos..pos + segment_length - idx]);
 
                 return Ok(dst)
             }
 
-            dst.extend(src[pos..pos + op_len]);
+            dst.extend(&src[pos..pos + op_len]);
             pos += header_byte + 1;
+            idx += op_len;
         } // header_byte == 128 is noop
 
         if pos >= eod { return Ok(dst) }
@@ -673,12 +667,14 @@ fn _encode_frame(
         Required if bpp is greater than 1, '>' if `src` is in big endian byte
         order, '<' if little endian.
     */
+
+    // Pre-define our errors for neatness
     let err_invalid_nr_samples = Err(
         String::from("The (0028,0002) 'Samples per Pixel' must be 1 or 3").into()
     );
     let err_invalid_bits_allocated = Err(
         String::from(
-            "The (0028,0100) 'Bits Allocated' value must be 8, 16, 32 or 64"
+            "The (0028,0100) 'Bits Allocated' value must be 1, 8, 16, 32 or 64"
         ).into()
     );
     let err_invalid_nr_segments = Err(
@@ -699,22 +695,31 @@ fn _encode_frame(
         String::from("'byteorder' must be '>' or '<'").into()
     );
 
-    // Check 'Samples per Pixel'
+    // Check 'Samples per Pixel' is either 1 or 3
     match spp {
         1 | 3 => {},
         _ => return err_invalid_nr_samples
     }
 
-    // Check 'Bits Allocated'
+    // Check 'Bits Allocated' is 1 or a multiple of 8
+    let mut is_bit_packed = false;
+    let mut bytes_per_pixel: u8;
     match bpp {
         0 => return err_invalid_bits_allocated,
+        1 => {
+                is_bit_packed = true;
+                bytes_per_pixel = 1;
+        },
         _ => match bpp % 8 {
-            0 => {},
+            0 => {
+                bytes_per_pixel = bpp / 8;
+            },
             _ => return err_invalid_bits_allocated
         }
     }
-    // Ensure `bytes_per_pixel` is in [1, 2, 4, 8]
-    let bytes_per_pixel: u8 = bpp / 8;
+
+    // Check `byteorder` is a valid character
+    // Check *Bits Allocated* is in [1, 8, 16, 32, 64]
     match bytes_per_pixel {
         1 => {},
         2 | 4 | 8 => match byteorder {
@@ -725,11 +730,15 @@ fn _encode_frame(
     }
 
     // Ensure parameters are consistent
+    // TODO: handle unwrap
     let r = usize::try_from(rows).unwrap();
     let c = usize::try_from(cols).unwrap();
 
-    if src.len() != r * c * usize::from(spp * bytes_per_pixel) {
-        return err_invalid_parameters
+    // FIXME: add support for bit-packed data
+    if !is_bit_packed {
+        if src.len() != r * c * usize::from(spp * bytes_per_pixel) {
+            return err_invalid_parameters
+        }
     }
 
     let nr_segments: u8 = spp * bytes_per_pixel;
@@ -766,6 +775,7 @@ fn _encode_frame(
             .cloned()
             .collect();
 
+        // FIXME: `cols` probably not correct for bit-packed data
         _encode_segment_from_vector(segment, dst, cols)?;
     }
 

From 5f3170898d5ee86c7d493e96e60d5040075c1964 Mon Sep 17 00:00:00 2001
From: scaramallion <scaramallion@users.noreply.github.com>
Date: Thu, 5 Jun 2025 15:10:36 +1000
Subject: [PATCH 3/9] Add release note

---
 docs/release_notes/v2.2.0.rst | 9 +++++++++
 1 file changed, 9 insertions(+)
 create mode 100644 docs/release_notes/v2.2.0.rst

diff --git a/docs/release_notes/v2.2.0.rst b/docs/release_notes/v2.2.0.rst
new file mode 100644
index 0000000..1a81007
--- /dev/null
+++ b/docs/release_notes/v2.2.0.rst
@@ -0,0 +1,9 @@
+.. _v2.2.0:
+
+2.2.0
+=====
+
+Enhancements
+............
+
+* Added support for encoding and decoding RLE data with a *Bits Allocated* of 1

From 62b5e722fb756290e6b7a1406cfc0a7104efa81d Mon Sep 17 00:00:00 2001
From: scaramallion <scaramallion@users.noreply.github.com>
Date: Thu, 4 Sep 2025 11:07:38 +1000
Subject: [PATCH 4/9] Probably before rethink

---
 Cargo.toml     |   4 +-
 LICENSE        |   2 +-
 pyproject.toml |   3 +-
 rle/utils.py   |  10 ++-
 src/lib.rs     | 190 ++++++++++++++++++++++++-------------------------
 5 files changed, 101 insertions(+), 108 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index bec581f..a30f956 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "pylibjpeg-rle"
-version = "1.1.0"
+version = "1.2.0"
 authors = ["scaramallion <scaramallion@users.noreply.github.com>"]
 edition = "2024"
 exclude = [".github", "docs", ".codecov.yml", "asv.*", ".gitignore", ".coveragerc"]
@@ -11,5 +11,5 @@ crate-type = ["cdylib"]
 
 
 [dependencies]
-pyo3 = { version = "0.24.2", features = ["extension-module"] }
+pyo3 = { version = "0.25.0", features = ["extension-module"] }
 bitvec = { version = "1.0.1" }
diff --git a/LICENSE b/LICENSE
index 62f3da2..133562c 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2020-2021 scaramallion
+Copyright (c) 2020-2025 scaramallion
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
diff --git a/pyproject.toml b/pyproject.toml
index e79a7b6..d4eca56 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -18,6 +18,7 @@ classifiers = [
     "Programming Language :: Python :: 3.11",
     "Programming Language :: Python :: 3.12",
     "Programming Language :: Python :: 3.13",
+    "Programming Language :: Python :: 3.14",
     "Operating System :: MacOS :: MacOS X",
     "Operating System :: POSIX :: Linux",
     "Operating System :: Microsoft :: Windows",
@@ -59,7 +60,7 @@ omit = [
 ]
 
 [tool.mypy]
-python_version = "3.8"
+python_version = "3.10"
 files = "rle"
 exclude = ["rle/tests", "rle/benchmarks"]
 show_error_codes = true
diff --git a/rle/utils.py b/rle/utils.py
index c9e491f..376c084 100644
--- a/rle/utils.py
+++ b/rle/utils.py
@@ -26,8 +26,6 @@ def decode_pixel_data(
 ) -> Union[np.ndarray, bytearray]:
     """Return the decoded RLE Lossless data as a :class:`numpy.ndarray`.
 
-    Intended for use with *pydicom* ``Dataset`` objects.
-
     Parameters
     ----------
     src : bytes
@@ -192,11 +190,11 @@ def encode_pixel_data(
     **kwargs
         If `ds` is not used then the following are required:
 
-        * ``'rows': int`` the number of rows contained in `src`
-        * ``'columns': int`` the number of columns contained in `src`
-        * ``samples_per_pixel': int`` the number of samples per pixel, either
+        * ``'rows'``: :class:`int` the number of rows contained in `src`
+        * ``'columns'``: :class:`int` the number of columns contained in `src`
+        * ``'samples_per_pixel'``: :class:`int` the number of samples per pixel, either
           1 for monochrome or 3 for RGB or similar data.
-        * ``'bits_allocated': int`` the number of bits needed to contain each
+        * ``'bits_allocated'``: :class:`int` the number of bits needed to contain each
           pixel, either 1, 8, 16, 32 or 64.
 
     Returns
diff --git a/src/lib.rs b/src/lib.rs
index d97e4a5..aa04348 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -2,7 +2,7 @@
 use std::convert::TryFrom;
 use std::error::Error;
 
-use bitvec::prelude::*;
+// use bitvec::prelude::*;
 
 use pyo3::prelude::*;
 use pyo3::wrap_pyfunction;
@@ -60,7 +60,7 @@ fn _parse_header(src: &[u8; 64]) -> [u32; 15] {
     Parameters
     ----------
     src
-        The 64 byte RLE header.
+        The 64 byte RLE header containing 15 little-endian ordered offset values.
     */
     return [
         u32::from_le_bytes([ src[4],  src[5],  src[6],  src[7]]),
@@ -158,7 +158,7 @@ fn _decode_frame(
 
     // Check 'Bits Allocated' is 1 or a multiple of 8
     let mut is_bit_packed = false;
-    let mut bytes_per_pixel: u8;
+    let mut bytes_per_pixel: u8;  // Valid values are 1 | 2 | 4 | 8
     match bpp {
         0 => return err_invalid_bits_allocated,
         1 => {
@@ -193,6 +193,7 @@ fn _decode_frame(
     // Don't need to check the unwrap as we just checked
     //   there's enough data in `src`
     let header = <&[u8; 64]>::try_from(&src[0..64]).unwrap();
+    // All 15 header offsets, however no guarantee they will be non-zero
     let all_offsets: [u32; 15] = _parse_header(header);
 
     // First offset must always be 64
@@ -221,13 +222,19 @@ fn _decode_frame(
     // Check the samples per pixel is conformant
     let spp: u8 = nr_segments / bytes_per_pixel;
     match spp {
-        1 | 3 => {},
+        1 => {},
+        3 => {
+            // Bit packed data must be 1 sample per pixel
+            match bpp {
+                1 => { return err_invalid_nr_samples },
+                _ => {}
+            }
+        },
         _ => return err_invalid_nr_samples
     }
 
-    /*
-    Example
-    -------
+    /* Example
+    ----------
     RLE encoded data is ordered like this (for 16-bit, 3 sample):
       Segment: 1     | 2     | 3     | 4     | 5     | 6
                R MSB | R LSB | G MSB | G LSB | B MSB | B LSB
@@ -242,23 +249,6 @@ fn _decode_frame(
       MSB LSB MSB LSB ... MSB LSB | MSB LSB MSB LSB ... MSB LSB | ...
     */
 
-    /*
-    Bit-packed data
-    ---------------
-
-    For bit-packed data (i.e. a *Bits Allocated* of 1), we decode the RLE encoding but
-    leave the bit-packing in place. This means the length of each decoded segment is not
-    equal to the number of pixels but instead the number of pixels / 8, rounded up to the
-    nearest whole integer with the difference being accounted for by padding 0b0 bits.
-
-    Each decoded segment must have these padding bits removed and the final frame should be
-    the concatenated unpadded segments. Fortunately for bit-packed data there can be at
-    most 3 segments (with a *Samples per Pixel* of 3).
-
-    Might be a good idea to use bool arrays instead of u8 and implicitly unpack to avoid
-    annoyances with multi-segment padding, then convert to u8 at the end.
-    */
-
     // Decode each segment and place it into the vector
     // ------------------------------------------------
     // TODO: handle unwrap
@@ -303,6 +293,7 @@ fn _decode_frame(
                     usize::from(bytes_per_pixel),
                     usize::from(byte_offset) + so
                 )?;
+
                 if len != pps { return err_segment_length }
             }
         }
@@ -310,50 +301,32 @@ fn _decode_frame(
         return Ok(frame)
     }
 
-    // Bit-packed data
-    // The number of whole bytes per segment after decoding
-    // TODO: handle unwraps
-    let mut bytes_per_segment = usize::try_from(nr_pixels / 8).unwrap();
-    match nr_pixels % 8 {
-        0 => {},
-        _ => { bytes_per_segment += 1); },
-    }
-
-    let mut frame: Vec<u8> = Vec::new();
-    for idx in 0..(offsets.len() - 1) {  // 0..1 or 0..3
-        // Start and end indices of the segment
-        let start = usize::try_from(offsets[idx]).unwrap();
-        let end = usize::try_from(offsets[idx + 1]).unwrap();
-        // TODO: match here on the unwrap
-        let segment = _decode_bit_packed_segment(
-            <&[u8]>::try_from(&src[start..end]).unwrap(),
-            usize::from(bytes_per_segment),
-        )?;
-
-        if segment.len() != bytes_per_segment { return err_segment_length }
-
-        if spp == 1 { return Ok(segment) }
+    /* Bit-packed data
+    ------------------
+    For bit-packed data (i.e. a *Bits Allocated* of 1), we decode the RLE encoded data but
+    leave the bit-packing in place. This means the length of each decoded segment is not
+    equal to the number of pixels but instead the number of pixels / 8, rounded up to the
+    nearest whole integer, with the difference being accounted for by 0b0 padding bits.
+    */
 
-        frame.extend(segment);
+    // The expected number of whole bytes per segment after decoding
+    let mut bytes_per_segment: usize;
+    match nr_pixels % 8 {
+        0 => { bytes_per_segment = pps / 8; },
+        _ => { bytes_per_segment = (pps + (8 - pps % 8)) / 8; },
     }
 
-    // Multiple samples but byte aligned
-    if nr_pixels % 8 == 0 { return Ok(frame) }
-
-    // Multiple samples but not byte aligned
-    //  -> need to remove the bit padding between segments
-    // TODO: handle unwraps
-    let mut bv = BitVec::<_, Msb0>::from_vec(frame);
-    let step = usize::try_from(nr_pixels).unwrap();
-    for start in (step..bv.len() - 1).step_by(step) {
-        let end = start + (8 - usize::try_from(nr_pixels % 8).unwrap());
-        let bv2 = bv.drain(start..end);
-    }
+    // For bit-packed data we can only have 1 segment.
+    let start = usize::try_from(offsets[0]).unwrap();
+    let end = usize::try_from(offsets[1]).unwrap();
+    let segment = _decode_bit_packed_segment(
+        <&[u8]>::try_from(&src[start..end]).unwrap(),
+        bytes_per_segment,
+    )?;
 
-    // Set any trailing bits to 0b0
-    bv.set_uninitialized(false);
+    if segment.len() != bytes_per_segment) { return err_segment_length }
 
-    Ok(bv.into_vec())
+    return Ok(segment)
 }
 
 
@@ -373,8 +346,7 @@ fn _decode_bit_packed_segment(
 
     let err_eod = Err(
         String::from(
-            "The end of the data was reached before the segment was \
-            completely decoded"
+            "The end of the data was reached before the segment was completely decoded"
         ).into()
     );
 
@@ -625,7 +597,7 @@ fn encode_frame<'py>(
     spp : int
         The number of samples per pixel, supported values are 1 or 3.
     bpp : int
-        The number of bits per pixel, supported values are 8, 16, 32 and 64.
+        The number of bits per pixel, supported values are 1, 8, 16, 32 and 64.
     byteorder : str
         Required if `bpp` is greater than 1, '>' if `src` is in big endian byte
         order, '<' if little endian.
@@ -651,8 +623,8 @@ fn _encode_frame(
     Parameters
     ----------
     src
-        The data to be RLE encoded, ordered as R1, G1, B1, R2, G2, B2, ...,
-        Rn, Gn, Bn (i.e. Planar Configuration 0).
+        The data to be RLE encoded, with multi-sample data ordered as R1, G1, B1,
+        R2, G2, B2, ..., Rn, Gn, Bn (i.e. Planar Configuration 0).
     dst
         The vector storing the encoded data.
     rows
@@ -660,9 +632,10 @@ fn _encode_frame(
     cols
         The number of columns in the data.
     spp
-        The number of samples per pixel, supported values are 1 or 3.
+        The number of samples per pixel, supported values are 1 or 3. May only be 1 if `bpp`
+        is 1.
     bpp
-        The number of bits per pixel, supported values are 8, 16, 32 and 64.
+        The number of bits per pixel, supported values are 1, 8, 16, 32 and 64.
     byteorder
         Required if bpp is greater than 1, '>' if `src` is in big endian byte
         order, '<' if little endian.
@@ -696,26 +669,31 @@ fn _encode_frame(
     );
 
     // Check 'Samples per Pixel' is either 1 or 3
-    match spp {
-        1 | 3 => {},
-        _ => return err_invalid_nr_samples
-    }
-
     // Check 'Bits Allocated' is 1 or a multiple of 8
+    // Check 'Samples per Pixel' is 1 if 'Bits Allocated' is 1
     let mut is_bit_packed = false;
     let mut bytes_per_pixel: u8;
-    match bpp {
-        0 => return err_invalid_bits_allocated,
+    match spp {
         1 => {
-                is_bit_packed = true;
-                bytes_per_pixel = 1;
-        },
-        _ => match bpp % 8 {
-            0 => {
-                bytes_per_pixel = bpp / 8;
-            },
-            _ => return err_invalid_bits_allocated
+            match bpp {
+                1 => {
+                    is_bit_packed = true;
+                    bytes_per_pixel = 1;
+                },
+                8 | 16 | 32 | 64 => {
+                    bytes_per_pixel = bpp / 8;
+                },
+                _ => { return err_invalid_bits_allocated }
+            }
         }
+        3 => {
+            match bpp {
+                1 => { return err_invalid_nr_samples },
+                8 | 16 | 32 | 64 => { bytes_per_pixel = bpp / 8; },
+                _ => { return err_invalid_bits_allocated }
+            }
+        },
+        _ => return err_invalid_nr_samples
     }
 
     // Check `byteorder` is a valid character
@@ -734,11 +712,17 @@ fn _encode_frame(
     let r = usize::try_from(rows).unwrap();
     let c = usize::try_from(cols).unwrap();
 
-    // FIXME: add support for bit-packed data
+    let total_pixels = r * c * usize::from(spp);
     if !is_bit_packed {
-        if src.len() != r * c * usize::from(spp * bytes_per_pixel) {
-            return err_invalid_parameters
+        let total_length = total_pixels * usize::from(bytes_per_pixel);
+        if src.len() != total_length { return err_invalid_parameters }
+    } else {
+        let mut total_length: usize;
+        match total_pixels % 8 {
+            0 => { total_length = total_pixels / 8; },
+            _ => { total_length = (total_pixels + (8 - total_pixels % 8)) / 8; }
         }
+        if src.len() != total_length { return err_invalid_parameters }
     }
 
     let nr_segments: u8 = spp * bytes_per_pixel;
@@ -749,10 +733,11 @@ fn _encode_frame(
     dst.extend(u32::from(nr_segments).to_le_bytes().to_vec());
     dst.extend([0u8; 60].to_vec());
 
-    // A vector of the start indexes used when segmenting - default big endian
+    // A vector of the start indexes used when segmenting
+    // Start with big-endian ordered pixel sample values
     let mut start_indices: Vec<usize> = (0..usize::from(nr_segments)).collect();
     if byteorder != '>' {
-        // `src` has little endian byte ordering
+        // Typically `src` uses little endian byte ordering
         for idx in 0..spp {
             let s = usize::from(idx * bytes_per_pixel);
             let e = usize::from((idx + 1) * bytes_per_pixel);
@@ -761,6 +746,8 @@ fn _encode_frame(
     }
 
     // Encode the data and update the RLE header segment offsets
+    // Segments are ordered from most significant byte to least significant for
+    //  multi-byte values
     for idx in 0..usize::from(nr_segments) {
         // Update RLE header: convert current offset to 4x le ordered u8s
         let current_offset = (u32::try_from(dst.len()).unwrap()).to_le_bytes();
@@ -769,14 +756,21 @@ fn _encode_frame(
         }
 
         // Encode! Note the offset start of the `src` iter
-        let segment: Vec<u8> = src[start_indices[idx]..]
-            .into_iter()
-            .step_by(usize::from(spp * bytes_per_pixel))
-            .cloned()
-            .collect();
-
-        // FIXME: `cols` probably not correct for bit-packed data
-        _encode_segment_from_vector(segment, dst, cols)?;
+        if !is_bit_packed {
+            let segment: Vec<u8> = src[start_indices[idx]..]
+                .into_iter()
+                .step_by(usize::from(spp * bytes_per_pixel))
+                .cloned()
+                .collect();
+
+            _encode_segment_from_vector(segment, dst, cols)?;
+        } else {
+            // Should only ever be 1 sample per pixel -> 1 segment
+            // cols is wrong here, should be / 8
+            // Also the DICOM Standard says each row should be encoded separately,
+            //  what do we do if the number of columns isn't divisble by 8?
+            _encode_segment_from_vector(src, dst, cols)?;
+        }
     }
 
     Ok(())

From 0f611904d30cac189ffb4f886f5059a23dc10826 Mon Sep 17 00:00:00 2001
From: scaramallion <scaramallion@users.noreply.github.com>
Date: Fri, 5 Sep 2025 16:06:09 +1000
Subject: [PATCH 5/9] Rework to fit standard, add pack_bits and unpack_bits

---
 .github/workflows/pytest-builds.yml  |   5 +-
 .github/workflows/release-wheels.yml | 127 ++++++-----
 Cargo.lock                           |  90 ++------
 Cargo.toml                           |   3 +-
 pyproject.toml                       |   6 +-
 rle/__init__.py                      |   9 +-
 rle/tests/test_decode.py             | 132 +++---------
 rle/tests/test_encode.py             |  12 +-
 rle/tests/test_utils.py              | 305 ++++++++++++++++++++++++++-
 rle/utils.py                         | 132 +++++++++---
 src/lib.rs                           | 298 ++++++++++++++++----------
 11 files changed, 750 insertions(+), 369 deletions(-)

diff --git a/.github/workflows/pytest-builds.yml b/.github/workflows/pytest-builds.yml
index b9bfd80..4022531 100644
--- a/.github/workflows/pytest-builds.yml
+++ b/.github/workflows/pytest-builds.yml
@@ -12,11 +12,11 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ['3.10', '3.11', '3.12', '3.13']
+        python-version: ['3.10', '3.11', '3.12', '3.13', '3.14']
         os: [ubuntu-latest, windows-latest, macos-latest]
 
     steps:
-    - uses: actions/checkout@v4
+    - uses: actions/checkout@v5
       with:
         fetch-depth: 2
 
@@ -24,6 +24,7 @@ jobs:
       uses: actions/setup-python@v5
       with:
         python-version: ${{ matrix.python-version }}
+        allow-prereleases: true
 
     - name: Install Rust (stable)
       run:
diff --git a/.github/workflows/release-wheels.yml b/.github/workflows/release-wheels.yml
index c8c878d..a3e7cf9 100644
--- a/.github/workflows/release-wheels.yml
+++ b/.github/workflows/release-wheels.yml
@@ -13,7 +13,7 @@ jobs:
     runs-on: ubuntu-latest
     timeout-minutes: 10
     steps:
-    - uses: actions/checkout@v4
+    - uses: actions/checkout@v5
       with:
         submodules: true
 
@@ -58,6 +58,9 @@ jobs:
           - os: windows-latest
             python: 313
             platform_id: win32
+          - os: windows-latest
+            python: 314
+            platform_id: win32
 
           # Windows 64 bit
           - os: windows-latest
@@ -72,6 +75,9 @@ jobs:
           - os: windows-latest
             python: 313
             platform_id: win_amd64
+          - os: windows-latest
+            python: 314
+            platform_id: win_amd64
 
           # Linux 64 bit manylinux2014
           - os: ubuntu-latest
@@ -90,6 +96,10 @@ jobs:
             python: 313
             platform_id: manylinux_x86_64
             manylinux_image: manylinux2014
+          - os: ubuntu-latest
+            python: 314
+            platform_id: manylinux_x86_64
+            manylinux_image: manylinux2014
 
           # Linux aarch64
           - os: ubuntu-latest
@@ -104,6 +114,9 @@ jobs:
           - os: ubuntu-latest
             python: 313
             platform_id: manylinux_aarch64
+          - os: ubuntu-latest
+            python: 314
+            platform_id: manylinux_aarch64
 
           # MacOS x86_64
           - os: macos-latest
@@ -118,6 +131,9 @@ jobs:
           - os: macos-latest
             python: 313
             platform_id: macosx_x86_64
+          - os: macos-latest
+            python: 314
+            platform_id: macosx_x86_64
 
           # MacOS arm64
           - os: macos-latest
@@ -132,9 +148,12 @@ jobs:
           - os: macos-latest
             python: 313
             platform_id: macosx_arm64
+          - os: macos-latest
+            python: 314
+            platform_id: macosx_arm64
 
     steps:
-    - uses: actions/checkout@v4
+    - uses: actions/checkout@v5
       with:
         submodules: true
 
@@ -157,7 +176,7 @@ jobs:
       run: |
         python -m pip install -U pip
         python -m pip install -U setuptools-rust
-        python -m pip install cibuildwheel>=2.23
+        python -m pip install cibuildwheel>=3.1.3
 
     - name: Build wheels
       env:
@@ -179,63 +198,63 @@ jobs:
         name: wheel-${{ matrix.python }}-${{ matrix.platform_id }}
         path: ./dist
 
-  test-package:
-    name: Test built package
-    needs: [ build-wheels, build-sdist ]
-    runs-on: ubuntu-latest
-    timeout-minutes: 30
-    strategy:
-      fail-fast: false
-      matrix:
-        python-version: ['3.10', '3.11', '3.12', '3.13']
-
-    steps:
-    - name: Install Rust (stable)
-      run:
-        curl https://sh.rustup.rs -sSf | sh -s -- -y
-
-    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v5
-      with:
-        python-version: ${{ matrix.python-version }}
-
-    - name: Download the wheels
-      uses: actions/download-artifact@v4
-      with:
-        path: dist/
-        merge-multiple: true
-
-    - name: Install from package wheels and test
-      run: |
-        python -m venv testwhl
-        source testwhl/bin/activate
-        python -m pip install -U pip
-        python -m pip install -U pytest pydicom pylibjpeg
-        python -m pip uninstall -y pylibjpeg-rle
-        python -m pip install git+https://github.com/pydicom/pylibjpeg-data
-        python -m pip install -U --pre --find-links dist/ pylibjpeg-rle
-        python -m pytest --pyargs rle.tests
-        deactivate
-
-    - name: Install from package tarball and test
-      run: |
-        python -m venv testsrc
-        source testsrc/bin/activate
-        python -m pip install -U pip
-        python -m pip install -U pytest pydicom pylibjpeg
-        python -m pip uninstall -y pylibjpeg-rle
-        python -m pip install git+https://github.com/pydicom/pylibjpeg-data
-        export PATH="$PATH:$HOME/.cargo/bin"
-        python -m pip install -U dist/pylibjpeg*rle-*.tar.gz
-        python -m pytest --pyargs rle.tests
-        deactivate
+  # test-package:
+  #   name: Test built package
+  #   needs: [ build-wheels, build-sdist ]
+  #   runs-on: ubuntu-latest
+  #   timeout-minutes: 30
+  #   strategy:
+  #     fail-fast: false
+  #     matrix:
+  #       python-version: ['3.10', '3.11', '3.12', '3.13', '3.14']
+  #
+  #   steps:
+  #   - name: Install Rust (stable)
+  #     run:
+  #       curl https://sh.rustup.rs -sSf | sh -s -- -y
+  #
+  #   - name: Set up Python ${{ matrix.python-version }}
+  #     uses: actions/setup-python@v5
+  #     with:
+  #       python-version: ${{ matrix.python-version }}
+  #
+  #   - name: Download the wheels
+  #     uses: actions/download-artifact@v4
+  #     with:
+  #       path: dist/
+  #       merge-multiple: true
+  #
+  #   - name: Install from package wheels and test
+  #     run: |
+  #       python -m venv testwhl
+  #       source testwhl/bin/activate
+  #       python -m pip install -U pip
+  #       python -m pip install -U pytest pydicom pylibjpeg
+  #       python -m pip uninstall -y pylibjpeg-rle
+  #       python -m pip install git+https://github.com/pydicom/pylibjpeg-data
+  #       python -m pip install -U --pre --find-links dist/ pylibjpeg-rle
+  #       python -m pytest --pyargs rle.tests
+  #       deactivate
+  #
+  #   - name: Install from package tarball and test
+  #     run: |
+  #       python -m venv testsrc
+  #       source testsrc/bin/activate
+  #       python -m pip install -U pip
+  #       python -m pip install -U pytest pydicom pylibjpeg
+  #       python -m pip uninstall -y pylibjpeg-rle
+  #       python -m pip install git+https://github.com/pydicom/pylibjpeg-data
+  #       export PATH="$PATH:$HOME/.cargo/bin"
+  #       python -m pip install -U dist/pylibjpeg*rle-*.tar.gz
+  #       python -m pytest --pyargs rle.tests
+  #       deactivate
 
   # The pypi upload fails with non-linux containers, so grab the uploaded
   #     artifacts and run using those
   # See: https://github.com/pypa/gh-action-pypi-publish/discussions/15
   deploy:
     name: Upload wheels to PyPI
-    needs: [ test-package ]
+    needs: [ build-wheels ]
     runs-on: ubuntu-latest
     environment:
       name: pypi
diff --git a/Cargo.lock b/Cargo.lock
index fcc00f6..354d199 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4,33 +4,9 @@ version = 4
 
 [[package]]
 name = "autocfg"
-version = "1.4.0"
+version = "1.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
-
-[[package]]
-name = "bitvec"
-version = "1.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c"
-dependencies = [
- "funty",
- "radium",
- "tap",
- "wyz",
-]
-
-[[package]]
-name = "cfg-if"
-version = "1.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
-
-[[package]]
-name = "funty"
-version = "2.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c"
+checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
 
 [[package]]
 name = "heck"
@@ -46,9 +22,9 @@ checksum = "f4c7245a08504955605670dbf141fceab975f15ca21570696aebe9d2e71576bd"
 
 [[package]]
 name = "libc"
-version = "0.2.172"
+version = "0.2.175"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa"
+checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543"
 
 [[package]]
 name = "memoffset"
@@ -67,34 +43,32 @@ checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
 
 [[package]]
 name = "portable-atomic"
-version = "1.11.0"
+version = "1.11.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "350e9b48cbc6b0e028b0473b114454c6316e57336ee184ceab6e53f72c178b3e"
+checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483"
 
 [[package]]
 name = "proc-macro2"
-version = "1.0.95"
+version = "1.0.101"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778"
+checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de"
 dependencies = [
  "unicode-ident",
 ]
 
 [[package]]
 name = "pylibjpeg-rle"
-version = "1.1.0"
+version = "1.2.0"
 dependencies = [
- "bitvec",
  "pyo3",
 ]
 
 [[package]]
 name = "pyo3"
-version = "0.24.2"
+version = "0.26.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e5203598f366b11a02b13aa20cab591229ff0a89fd121a308a5df751d5fc9219"
+checksum = "7ba0117f4212101ee6544044dae45abe1083d30ce7b29c4b5cbdfa2354e07383"
 dependencies = [
- "cfg-if",
  "indoc",
  "libc",
  "memoffset",
@@ -108,19 +82,18 @@ dependencies = [
 
 [[package]]
 name = "pyo3-build-config"
-version = "0.24.2"
+version = "0.26.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "99636d423fa2ca130fa5acde3059308006d46f98caac629418e53f7ebb1e9999"
+checksum = "4fc6ddaf24947d12a9aa31ac65431fb1b851b8f4365426e182901eabfb87df5f"
 dependencies = [
- "once_cell",
  "target-lexicon",
 ]
 
 [[package]]
 name = "pyo3-ffi"
-version = "0.24.2"
+version = "0.26.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "78f9cf92ba9c409279bc3305b5409d90db2d2c22392d443a87df3a1adad59e33"
+checksum = "025474d3928738efb38ac36d4744a74a400c901c7596199e20e45d98eb194105"
 dependencies = [
  "libc",
  "pyo3-build-config",
@@ -128,9 +101,9 @@ dependencies = [
 
 [[package]]
 name = "pyo3-macros"
-version = "0.24.2"
+version = "0.26.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0b999cb1a6ce21f9a6b147dcf1be9ffedf02e0043aec74dc390f3007047cecd9"
+checksum = "2e64eb489f22fe1c95911b77c44cc41e7c19f3082fc81cce90f657cdc42ffded"
 dependencies = [
  "proc-macro2",
  "pyo3-macros-backend",
@@ -140,9 +113,9 @@ dependencies = [
 
 [[package]]
 name = "pyo3-macros-backend"
-version = "0.24.2"
+version = "0.26.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "822ece1c7e1012745607d5cf0bcb2874769f0f7cb34c4cde03b9358eb9ef911a"
+checksum = "100246c0ecf400b475341b8455a9213344569af29a3c841d29270e53102e0fcf"
 dependencies = [
  "heck",
  "proc-macro2",
@@ -160,29 +133,17 @@ dependencies = [
  "proc-macro2",
 ]
 
-[[package]]
-name = "radium"
-version = "0.7.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09"
-
 [[package]]
 name = "syn"
-version = "2.0.101"
+version = "2.0.106"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8ce2b7fc941b3a24138a0a7cf8e858bfc6a992e7978a068a5c760deb0ed43caf"
+checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6"
 dependencies = [
  "proc-macro2",
  "quote",
  "unicode-ident",
 ]
 
-[[package]]
-name = "tap"
-version = "1.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369"
-
 [[package]]
 name = "target-lexicon"
 version = "0.13.2"
@@ -200,12 +161,3 @@ name = "unindent"
 version = "0.2.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3"
-
-[[package]]
-name = "wyz"
-version = "0.5.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed"
-dependencies = [
- "tap",
-]
diff --git a/Cargo.toml b/Cargo.toml
index a30f956..144a8ed 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -11,5 +11,4 @@ crate-type = ["cdylib"]
 
 
 [dependencies]
-pyo3 = { version = "0.25.0", features = ["extension-module"] }
-bitvec = { version = "1.0.1" }
+pyo3 = { version = "0.26.0", features = ["extension-module"] }
diff --git a/pyproject.toml b/pyproject.toml
index d4eca56..ab085eb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -54,9 +54,13 @@ homepage = "https://github.com/pydicom/pylibjpeg-rle"
 [project.entry-points."pylibjpeg.pixel_data_encoders"]
 "1.2.840.10008.1.2.5" = "rle:encode_pixel_data"
 
+[project.entry-points."pylibjpeg.utils"]
+"pack_bits" = "rle:pack_bits"
+"unpack_bits" = "rle:unpack_bits"
+
 [tool.coverage.run]
 omit = [
-    "libjpeg/tests/*",
+    "rle/tests/*",
 ]
 
 [tool.mypy]
diff --git a/rle/__init__.py b/rle/__init__.py
index 8a79be0..2b21cf7 100644
--- a/rle/__init__.py
+++ b/rle/__init__.py
@@ -1,4 +1,11 @@
 """Set package shortcuts."""
 
 from rle._version import __version__
-from rle.utils import pixel_array, generate_frames, decode_pixel_data, encode_pixel_data
+from rle.utils import (
+    pixel_array,
+    generate_frames,
+    decode_pixel_data,
+    encode_pixel_data,
+    pack_bits,
+    unpack_bits,
+)
diff --git a/rle/tests/test_decode.py b/rle/tests/test_decode.py
index 13247dd..2f7368b 100644
--- a/rle/tests/test_decode.py
+++ b/rle/tests/test_decode.py
@@ -126,6 +126,28 @@ def test_invalid_samples_px_raises(self):
         with pytest.raises(ValueError, match=msg):
             decode_frame(d + b"\x00" * 8, 1, 8, "<")
 
+        # Bits Allocated 1 must be Samples per Pixel 1
+        header = (
+            b"\x03\x00\x00\x00"  # 3 segments
+            b"\x40\x00\x00\x00"  # 64
+            b"\x47\x00\x00\x00"  # 71
+            b"\x4E\x00\x00\x00"  # 78
+        )
+        header += (64 - len(header)) * b"\x00"
+        # 2 x 3 data
+        # 0, 64, 128, 160, 192, 255
+        data = (
+            b"\x05\x00\x40\x80\xA0\xC0\xFF"  # R
+            b"\x05\xFF\xC0\x80\x40\x00\xFF"  # B
+            b"\x05\x01\x40\x80\xA0\xC0\xFE"  # G
+        )
+        msg = (
+            r"The \(0028,0002\) 'Samples per Pixel' must be 1 if \(0028,0100\) 'Bits "
+            r"Allocated' is 1"
+        )
+        with pytest.raises(ValueError, match=msg):
+            decoded = decode_frame(header + data, 2 * 3, 1, "<")
+
     def test_insufficient_frame_literal(self):
         """Test segment with excess padding on lit."""
         d = self.as_bytes([64])
@@ -361,109 +383,19 @@ def test_u8_1s_bs1(self):
         """Test decoding bit packed 1 sample/px."""
         header = b"\x01\x00\x00\x00\x40\x00\x00\x00"
         header += (64 - len(header)) * b"\x00"
-        # 0, 64, 128, 160, 192, 255
-        data = b"\x05\x00\x40\x80\xA0\xC0\xFF"
-        # Big endian
-        # 48 px - byte aligned in 6 bytes
-        decoded = decode_frame(header + data, 6 * 8, 1, ">")
-        arr = np.frombuffer(decoded, np.dtype("uint8"))
-        assert [0, 64, 128, 160, 192, 255] == arr.tolist()
-
-        # 37 px -> 5 bytes
-        decoded = decode_frame(header + data, 4 * 8 + 5, 1, ">")
-        arr = np.frombuffer(decoded, np.dtype("uint8"))
-        assert [0, 64, 128, 160, 192] == arr.tolist()
-
-        # 2 px -> 1 byte
-        decoded = decode_frame(header + data, 2, 1, ">")
-        arr = np.frombuffer(decoded, np.dtype("uint8"))
-        assert [0] == arr.tolist()
-
-        # Little-endian
-        decoded = decode_frame(header + data, 6 * 8, 1, "<")
-        arr = np.frombuffer(decoded, np.dtype("uint8"))
-        assert [0, 64, 128, 160, 192, 255] == arr.tolist()
-
-        decoded = decode_frame(header + data, 4 * 8 + 5, 1, "<")
-        arr = np.frombuffer(decoded, np.dtype("uint8"))
-        assert [0, 64, 128, 160, 192] == arr.tolist()
-
-        decoded = decode_frame(header + data, 2, 1, "<")
-        arr = np.frombuffer(decoded, np.dtype("uint8"))
-        assert [0] == arr.tolist()
-
-    def test_u8_3s_bs1(self):
-        """Test decoding bit packed 3 sample/px."""
-        header = (
-            b"\x03\x00\x00\x00"  # 3 segments
-            b"\x40\x00\x00\x00"  # 64
-            b"\x47\x00\x00\x00"  # 71
-            b"\x4E\x00\x00\x00"  # 78
-        )
-        header += (64 - len(header)) * b"\x00"
-        # 0, 64, 128, 160, 192, 255
-        data = (
-            b"\x05\x00\x40\x80\xA0\xC0\xFF"  # R
-            b"\x05\x7F\xC0\x80\x40\x00\xFF"  # B
-            b"\x05\x01\x40\x80\xA0\xC0\xFE"  # G
-        )
-        # 48 px - byte aligned in 18 bytes
-        decoded = decode_frame(header + data, 6 * 8, 1, "<")
+        # 0 0 0 0 0 1 0 1 0 1 1 0 1 1 1 1
+        data = b"\xFC\x00\x07\x01\x00\x01\x00\x01\x01\x00\x01\xFD\x01\x00"
+        decoded = decode_frame(header + data, 16, 1, ">")
         arr = np.frombuffer(decoded, np.dtype("uint8"))
-        # Ordered all R, all G, all B
-        assert [0, 64, 128, 160, 192, 255] == arr[:6].tolist()
-        assert [127, 192, 128, 64, 0, 255] == arr[6:12].tolist()
-        assert [1, 64, 128, 160, 192, 254] == arr[12:].tolist()
+        assert [0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1] == arr.tolist()
+        decoded = decode_frame(header + data, 16, 1, "<")
+        assert [0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1] == arr.tolist()
 
-        # 47 px - non-byte aligned in 18 bytes
-        decoded = decode_frame(header + data, 6 * 8 - 1, 1, "<")
-        arr = np.frombuffer(decoded, np.dtype("uint8"))
-        # Boundaries are 47 | 94 | 141
-        #                       v removed
-        # 255 | 127: 0b[1111_111x 0b0]111_1111 -> 0b1111_1110
-        assert [0, 64, 128, 160, 192, 254] == arr[:6].tolist()
-        # Left shift values by 1 bit
-        # 127 | 192: 0b0[111_1111 0b1]100_0000 -> 0b1111_1111
-        # 192 | 128: 0b1[100_0000 0b1]000_0000 -> 0b1000_0001
-        # 128 |  64: 0b1[000_0000 0b0]100_0000 -> 0b0000_0000
-        #  64 |   0: 0b0[100_0000 0b0]000_0000 -> 0b1000_0000
-        #   0 | 255: 0b0[000_0000 0b1]111_1111 -> 0b0000_0001
-        #                       v removed
-        # 255 |   1: 0b1[111_111x 0b00]00_0001 -> 0b1111_1100
-        assert [255, 129, 0, 128, 1, 252] == arr[6:12].tolist()
-        # Left shift values by 2 bits
-        #   1 |  64: 0b00[00_0001 0b01]00_0000 -> 0b0000_0101
-        #  64 | 128: 0b01[00_0000 0b10]00_0000 -> 0b0000_0010
-        # 128 | 160: 0b10[00_0000 0b10]10_0000 -> 0b0000_0010
-        # 160 | 192: 0b10[10_0000 0b11]00_0000 -> 0b1000_0011
-        # 192 | 254: 0b11[00_0000 0b11]11_1110 -> 0b0000_0011
-        #                       v removed
-        # 254 |   x: 0b11[11_111x             -> 0b1111_1000
-        assert [5, 2, 2, 131, 3, 248] == arr[12:].tolist()
-
-        # 41 px - non-byte aligned in 16 bytes
-        decoded = decode_frame(header + data, 5 * 8 + 1, 1, "<")
+        # 0 0 0 0 0 1 0 1 0 1 1 0 1 1 1
+        data = b"\xFC\x00\x07\x01\x00\x01\x00\x01\x01\x00\x01\xFE\x01\x00"
+        decoded = decode_frame(header + data, 15, 1, ">")
         arr = np.frombuffer(decoded, np.dtype("uint8"))
-        # Boundaries are 48 | 96 | 144 -> 41 | 82 | 123
-        #                vvv vvvv removed
-        # 255 | 255: 0b[1xxx_xxxx 0b0111_111]1 -> 0b1011_1111
-        assert [0, 64, 128, 160, 192, 191] == arr[:6].tolist()
-        # Left shift values by 7 bits
-        # 255 | 192: 0b1111_111[1 0b1100_000]0 -> 0b1110_0000
-        # 192 | 128: 0b1100_000[0 0b1000_000]0 -> 0b0100_0000
-        # 128 |  64: 0b1000_000[0 0b0100_000]0 -> 0b0010_0000
-        #  64 |   0: 0b0100_000[0 0b0000_000]0 -> 0b0000_0000
-        #                                  vvv vvvv removed
-        #   0 | 255 |   1: 0b0000_000[0 0b1xxx_xxxx 0b0000_00]01 -> 0b0100_0000
-        assert [224, 64, 32, 0, 64] == arr[6:11].tolist()
-        # Left shift values by 14 bits
-        #   1 |  64: 0b0000_00[01 0b0100_00]00 -> 0b0101_0000
-        #  64 | 128: 0b0100_00[00 0b1000_00]00 -> 0b0010_0000
-        # 128 | 160: 0b1000_00[00 0b1010_00]00 -> 0b0010_1000
-        # 160 | 192: 0b1010_00[00 0b1100_00]00 -> 0b0011_0000
-        #                           vvv vvvv removed
-        # 192 | 254: 0b1100_[00 0b1]xxx_xxxx   -> 0b0010_0000
-        assert [80, 32, 40, 48, 32] == arr[11:].tolist()
+        assert [0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1] == arr.tolist()
 
 
 @pytest.mark.skipif(not HAVE_PYDICOM, reason="No pydicom")
diff --git a/rle/tests/test_encode.py b/rle/tests/test_encode.py
index 4548c10..d4b10eb 100644
--- a/rle/tests/test_encode.py
+++ b/rle/tests/test_encode.py
@@ -386,15 +386,19 @@ def test_invalid_samples_per_pixel_raises(self):
         with pytest.raises(ValueError, match=msg):
             encode_frame(b"", 1, 1, 4, 1, "<")
 
+        msg = (
+            r"The \(0028,0002\) 'Samples per Pixel' must be 1 if \(0028,0100\) 'Bits "
+            "Allocated' is 1"
+        )
+        with pytest.raises(ValueError, match=msg):
+            encode_frame(b"", 1, 1, 3, 1, "<")
+
     def test_invalid_bits_per_pixel_raises(self):
         """Test exception raised if bits per pixel not valid."""
-        msg = r"The \(0028,0100\) 'Bits Allocated' value must be 8, 16, 32 or 64"
+        msg = r"The \(0028,0100\) 'Bits Allocated' value must be 1, 8, 16, 32 or 64"
         with pytest.raises(ValueError, match=msg):
             encode_frame(b"", 1, 1, 1, 0, "<")
 
-        with pytest.raises(ValueError, match=msg):
-            encode_frame(b"", 1, 1, 1, 1, "<")
-
         with pytest.raises(ValueError, match=msg):
             encode_frame(b"", 1, 1, 1, 7, "<")
 
diff --git a/rle/tests/test_utils.py b/rle/tests/test_utils.py
index 87c66ee..3fe248e 100644
--- a/rle/tests/test_utils.py
+++ b/rle/tests/test_utils.py
@@ -1,5 +1,7 @@
 """Tests for the utils module."""
 
+from struct import pack, unpack
+
 import numpy as np
 import pytest
 
@@ -17,12 +19,44 @@
     from pydicom.pixels.decoders.rle import _rle_decode_frame
 
 from rle.data import get_indexed_datasets
-from rle.utils import encode_pixel_data, encode_array, pixel_data, pixel_array
+from rle.utils import (
+    decode_pixel_data,
+    encode_pixel_data,
+    encode_array,
+    pixel_data,
+    pixel_array,
+)
+from rle.rle import pack_bits, unpack_bits
 
 
 INDEX_LEE = get_indexed_datasets("1.2.840.10008.1.2.1")
 
 
+class TestDecodePixelData:
+    """Tests for decode_pixel_data()"""
+
+    def test_u8_1s_ba1(self):
+        """Tests bits allocated 1"""
+        header = b"\x01\x00\x00\x00\x40\x00\x00\x00"
+        header += (64 - len(header)) * b"\x00"
+        # 0 0 0 0 0 1 0 1 0 1 1 0 1 1 1 1
+        data = b"\xFC\x00\x07\x01\x00\x01\x00\x01\x01\x00\x01\xFD\x01\x00"
+        src = header + data
+        opts = {
+            "rows": 1,
+            "columns": 16,
+            "bits_allocated": 1,
+        }
+
+        frame = decode_pixel_data(src, version=2, **opts)
+        assert frame == (
+            b"\x00\x00\x00\x00\x00\x01\x00\x01\x00\x01\x01\x00\x01\x01\x01\x01"
+        )
+        opts["pack_bits"] = True
+        frame = decode_pixel_data(src, version=2, **opts)
+        assert frame == b"\xA0\xF6"
+
+
 @pytest.mark.skipif(not HAVE_PYDICOM, reason="no pydicom")
 class TestEncodeArray:
     """Tests for utils.encode_array()."""
@@ -234,6 +268,42 @@ def test_too_many_segments_raises(self):
         with pytest.raises(ValueError, match=msg):
             encode_pixel_data(b"", **kwargs)
 
+    def test_u8_1s_ba1(self):
+        """Tests bits allocated 1"""
+        opts = {
+            "rows": 1,
+            "columns": 16,
+            "bits_allocated": 1,
+            "samples_per_pixel": 1,
+        }
+        # 0 0 0 0 0 1 0 1 0 1 1 0 1 1 1 1
+        enc = encode_pixel_data(b"\xA0\xF6", **opts)
+
+        header = b"\x01\x00\x00\x00\x40\x00\x00\x00"
+        header += (64 - len(header)) * b"\x00"
+        data = b"\xFC\x00\x03\x01\x00\x01\x00\xFF\x01\x00\x00\xFD\x01\x00"
+        assert enc == header + data
+        assert decode_pixel_data(enc, version=2, **opts) == (
+            b"\x00\x00\x00\x00\x00\x01\x00\x01\x00\x01\x01\x00\x01\x01\x01\x01"
+        )
+
+        opts = {
+            "rows": 1,
+            "columns": 12,
+            "bits_allocated": 1,
+            "samples_per_pixel": 1,
+        }
+        # 0 0 0 0 0 1 0 1 0 1 1 0
+        enc = encode_pixel_data(b"\xA0\xF6", **opts)
+
+        header = b"\x01\x00\x00\x00\x40\x00\x00\x00"
+        header += (64 - len(header)) * b"\x00"
+        data = b"\xFC\x00\x03\x01\x00\x01\x00\xFF\x01\x00\x00\x00"
+        assert enc == header + data
+        assert decode_pixel_data(enc, version=2, **opts) == (
+            b"\x00\x00\x00\x00\x00\x01\x00\x01\x00\x01\x01\x00"
+        )
+
 
 @pytest.mark.skipif(not HAVE_PYDICOM, reason="no pydicom")
 class TestPixelData:
@@ -249,3 +319,236 @@ def test_pixel_data(self):
         ds.PixelData = data
 
         assert np.array_equal(ref, pixel_array(ds))
+
+
+REFERENCE_PACK_UNPACK = [
+    # src, little, big
+    (b"", [], []),
+    (b"\x00", [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]),
+    (b"\x01", [1, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 1]),
+    (b"\x02", [0, 1, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 1, 0]),
+    (b"\x04", [0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1, 0, 0]),
+    (b"\x08", [0, 0, 0, 1, 0, 0, 0, 0], [0, 0, 0, 0, 1, 0, 0, 0]),
+    (b"\x10", [0, 0, 0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0, 0]),
+    (b"\x20", [0, 0, 0, 0, 0, 1, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0]),
+    (b"\x40", [0, 0, 0, 0, 0, 0, 1, 0], [0, 1, 0, 0, 0, 0, 0, 0]),
+    (b"\x80", [0, 0, 0, 0, 0, 0, 0, 1], [1, 0, 0, 0, 0, 0, 0, 0]),
+    (b"\xaa", [0, 1, 0, 1, 0, 1, 0, 1], [1, 0, 1, 0, 1, 0, 1, 0]),
+    (b"\xf0", [0, 0, 0, 0, 1, 1, 1, 1], [1, 1, 1, 1, 0, 0, 0, 0]),
+    (b"\x0f", [1, 1, 1, 1, 0, 0, 0, 0], [0, 0, 0, 0, 1, 1, 1, 1]),
+    (b"\xff", [1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1]),
+    (
+        b"\x00\x00",
+        #| 1st byte              | 2nd byte
+        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+    ),
+    (
+        b"\x00\x01",
+        #| 1st byte              | 2nd byte
+        [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
+        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]
+        ),
+    (
+        b"\x00\x80",
+        #| 1st byte              | 2nd byte
+        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
+        [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
+        ),
+    (
+        b"\x00\xff",
+        #| 1st byte              | 2nd byte
+        [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1],
+        [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1],
+        ),
+    (
+        b"\x01\x80",
+        #| 1st byte              | 2nd byte
+        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
+        [0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0],
+        ),
+    (
+        b"\x80\x80",
+        #| 1st byte              | 2nd byte
+        [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1],
+        [1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
+        ),
+    (
+        b"\xff\x80",
+        #| 1st byte              | 2nd byte
+        [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1],
+        [1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
+        ),
+]
+
+
+class TestUnpackBits:
+    """Tests for unpack_bits()."""
+
+    @pytest.mark.parametrize("src, little, big", REFERENCE_PACK_UNPACK)
+    def test_unpack_bytes(self, src, little, big):
+        """Test unpacking data without numpy."""
+        as_bytes = pack(f"{len(little)}B", *little)
+        assert unpack_bits(src, 0, "<") == as_bytes
+        assert unpack_bits(src, 32, "<") == as_bytes
+        as_bytes = pack(f"{len(big)}B", *big)
+        assert unpack_bits(src, 0, ">") == as_bytes
+        assert unpack_bits(src, 32, ">") == as_bytes
+
+    def test_count_little(self):
+        """Test the `count` parameter for little endian unpacking."""
+        assert unpack_bits(b"\x00", 1, "<") == b"\x00"
+        assert unpack_bits(b"\xff", 1, "<") == b"\x01"
+        assert unpack_bits(b"\xff", 2, "<") == b"\x01" * 2
+        assert unpack_bits(b"\xff", 3, "<") == b"\x01" * 3
+        assert unpack_bits(b"\xff", 4, "<") == b"\x01" * 4
+        assert unpack_bits(b"\xff", 5, "<") == b"\x01" * 5
+        assert unpack_bits(b"\xff", 6, "<") == b"\x01" * 6
+        assert unpack_bits(b"\xff", 7, "<") == b"\x01" * 7
+        assert unpack_bits(b"\xff", 8, "<") == b"\x01" * 8
+        assert unpack_bits(b"\xff\xAA", 9, "<") == b"\x01" * 8 + b"\x00"
+        assert unpack_bits(b"\xff\xAA", 10, "<") == b"\x01" * 8 + b"\x00\x01"
+        assert unpack_bits(b"\xff\xAA", 11, "<") == b"\x01" * 8 + b"\x00\x01\x00"
+        assert unpack_bits(b"\xff\xAA", 12, "<") == b"\x01" * 8 + b"\x00\x01" * 2
+        assert unpack_bits(b"\xff\xAA", 13, "<") == b"\x01" * 8 + b"\x00\x01" * 2 + b"\x00"
+        assert unpack_bits(b"\xff\xAA", 14, "<") == b"\x01" * 8 + b"\x00\x01" * 3
+        assert unpack_bits(b"\xff\xAA", 15, "<") == b"\x01" * 8 + b"\x00\x01" * 3 + b"\x00"
+        assert unpack_bits(b"\xff\xAA", 16, "<") == b"\x01" * 8 + b"\x00\x01" * 4
+
+    def test_count_big(self):
+        """Test the `count` parameter for big endian unpacking."""
+        assert unpack_bits(b"\x00", 1, ">") == b"\x00"
+        assert unpack_bits(b"\xff", 1, ">") == b"\x01"
+        assert unpack_bits(b"\xff", 2, ">") == b"\x01" * 2
+        assert unpack_bits(b"\xff", 3, ">") == b"\x01" * 3
+        assert unpack_bits(b"\xff", 4, ">") == b"\x01" * 4
+        assert unpack_bits(b"\xff", 5, ">") == b"\x01" * 5
+        assert unpack_bits(b"\xff", 6, ">") == b"\x01" * 6
+        assert unpack_bits(b"\xff", 7, ">") == b"\x01" * 7
+        assert unpack_bits(b"\xff", 8, ">") == b"\x01" * 8
+        assert unpack_bits(b"\xff\xAA", 9, ">") == b"\x01" * 8 + b"\x01"
+        assert unpack_bits(b"\xff\xAA", 10, ">") == b"\x01" * 8 + b"\x01\x00"
+        assert unpack_bits(b"\xff\xAA", 11, ">") == b"\x01" * 8 + b"\x01\x00\x01"
+        assert unpack_bits(b"\xff\xAA", 12, ">") == b"\x01" * 8 + b"\x01\x00" * 2
+        assert unpack_bits(b"\xff\xAA", 13, ">") == b"\x01" * 8 + b"\x01\x00" * 2 + b"\x01"
+        assert unpack_bits(b"\xff\xAA", 14, ">") == b"\x01" * 8 + b"\x01\x00" * 3
+        assert unpack_bits(b"\xff\xAA", 15, ">") == b"\x01" * 8 + b"\x01\x00" * 3 + b"\x01"
+        assert unpack_bits(b"\xff\xAA", 16, ">") == b"\x01" * 8 + b"\x01\x00" * 4
+
+    @pytest.mark.parametrize("src, little, big", REFERENCE_PACK_UNPACK)
+    def test_unpack_bytearray(self, src, little, big):
+        """Test unpacking data without numpy."""
+        as_bytes = pack(f"{len(little)}B", *little)
+        assert unpack_bits(bytearray(src), 0, "<") == as_bytes
+        as_bytes = pack(f"{len(big)}B", *big)
+        assert unpack_bits(bytearray(src), 0, ">") == as_bytes
+
+
+REFERENCE_PACK_PARTIAL_LITTLE = [
+    #              | 1st byte              | 2nd byte
+    (b"\x00\x40", [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]),  # 15-bits
+    (b"\x00\x20", [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]),
+    (b"\x00\x10", [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]),
+    (b"\x00\x08", [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]),
+    (b"\x00\x04", [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]),
+    (b"\x00\x02", [0, 0, 0, 0, 0, 0, 0, 0, 0, 1]),
+    (b"\x00\x01", [0, 0, 0, 0, 0, 0, 0, 0, 1]),  # 9-bits
+    (b"\x80", [0, 0, 0, 0, 0, 0, 0, 1]),  # 8-bits
+    (b"\x40", [0, 0, 0, 0, 0, 0, 1]),
+    (b"\x20", [0, 0, 0, 0, 0, 1]),
+    (b"\x10", [0, 0, 0, 0, 1]),
+    (b"\x08", [0, 0, 0, 1]),
+    (b"\x04", [0, 0, 1]),
+    (b"\x02", [0, 1]),
+    (b"\x01", [1]),
+    (b"", []),
+]
+REFERENCE_PACK_PARTIAL_BIG = [
+    #              | 1st byte              | 2nd byte
+    (b"\x00\x02", [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]),  # 15-bits
+    (b"\x00\x04", [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]),
+    (b"\x00\x08", [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]),
+    (b"\x00\x10", [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]),
+    (b"\x00\x20", [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]),
+    (b"\x00\x40", [0, 0, 0, 0, 0, 0, 0, 0, 0, 1]),
+    (b"\x00\x80", [0, 0, 0, 0, 0, 0, 0, 0, 1]),  # 9-bits
+    (b"\x01", [0, 0, 0, 0, 0, 0, 0, 1]),  # 8-bits
+    (b"\x02", [0, 0, 0, 0, 0, 0, 1]),
+    (b"\x04", [0, 0, 0, 0, 0, 1]),
+    (b"\x08", [0, 0, 0, 0, 1]),
+    (b"\x10", [0, 0, 0, 1]),
+    (b"\x20", [0, 0, 1]),
+    (b"\x40", [0, 1]),
+    (b"\x80", [1]),
+    (b"", []),
+]
+
+
+class TestPackBits:
+    """Tests for pack_bits()."""
+
+    @pytest.mark.parametrize("output, little, big", REFERENCE_PACK_UNPACK)
+    def test_pack_bytes(self, output, little, big):
+        """Test packing data."""
+        assert output == pack_bits(bytes(little), "<")
+        assert output == pack_bits(bytes(big), ">")
+
+    @pytest.mark.parametrize("output, little, big", REFERENCE_PACK_UNPACK)
+    def test_pack_bytearray(self, output, little, big):
+        """Test packing data."""
+        assert output == pack_bits(bytearray(little), "<")
+        assert output == pack_bits(bytearray(big), ">")
+
+    def test_non_binary_input(self):
+        """Test non-binary input raises exception."""
+        msg = r"Only binary input \(containing zeros or ones\) can be packed"
+        with pytest.raises(ValueError, match=msg):
+            pack_bits(b"\x00\x00\x02\x00\x00\x00\x00\x00", "<")
+
+    def test_bytes_input(self):
+        """Repeat above test with bytes input."""
+        # fmt: off
+        src = bytes(
+            [
+                0, 0, 0, 0, 0, 0, 0, 0,
+                1, 0, 1, 0, 1, 0, 1, 0,
+                1, 1, 1, 1, 1, 1, 1, 1,
+            ]
+        )
+        # fmt: on
+        assert b"\x00\x55\xff" == pack_bits(src, "<")
+        assert b"\x00\xAA\xff" == pack_bits(src, ">")
+
+    def test_bytearry_input(self):
+        """Repeat above test with bytearray input."""
+        # fmt: off
+        src = bytearray(
+            [
+                0, 0, 0, 0, 0, 0, 0, 0,
+                1, 0, 1, 0, 1, 0, 1, 0,
+                1, 1, 1, 1, 1, 1, 1, 1,
+            ]
+        )
+        # fmt: on
+        assert b"\x00\x55\xff" == pack_bits(src, "<")
+        assert b"\x00\xAA\xff" == pack_bits(src, ">")
+
+    @pytest.mark.parametrize("output, src", REFERENCE_PACK_PARTIAL_LITTLE)
+    def test_pack_partial_bytes(self, src, output):
+        """Test packing data that isn't a full byte long."""
+        assert output == pack_bits(bytes(src), "<")
+
+    @pytest.mark.parametrize("output, src", REFERENCE_PACK_PARTIAL_LITTLE)
+    def test_pack_partial_bytearray(self, src, output):
+        """Test packing data that isn't a full byte long."""
+        assert output == pack_bits(bytearray(src), "<")
+
+    @pytest.mark.parametrize("output, src", REFERENCE_PACK_PARTIAL_BIG)
+    def test_pack_partial_bytes_big(self, src, output):
+        """Test packing data that isn't a full byte long."""
+        assert output == pack_bits(bytes(src), ">")
+
+    @pytest.mark.parametrize("output, src", REFERENCE_PACK_PARTIAL_BIG)
+    def test_pack_partial_bytearray_big(self, src, output):
+        """Test packing data that isn't a full byte long."""
+        assert output == pack_bits(bytearray(src), ">")
diff --git a/rle/utils.py b/rle/utils.py
index 376c084..d15c277 100644
--- a/rle/utils.py
+++ b/rle/utils.py
@@ -1,12 +1,19 @@
 """Utility functions."""
 
 import enum
+import math
 import sys
-from typing import Iterator, Optional, Any, TYPE_CHECKING, cast, Union
+from typing import Iterator, Any, TYPE_CHECKING, cast
 
 import numpy as np
 
-from rle.rle import decode_frame, decode_segment, encode_frame, encode_segment
+from rle.rle import (
+    decode_frame,
+    decode_segment,
+    encode_frame,
+    encode_segment,
+)
+from rle.rle import pack_bits as _pack_bits, unpack_bits as _unpack_bits
 
 
 if TYPE_CHECKING:  # pragma: no cover
@@ -20,11 +27,16 @@ class Version(enum.IntEnum):
 
 def decode_pixel_data(
     src: bytes,
-    ds: Optional["Dataset"] = None,
+    ds: "Dataset | None" = None,
     version: int = Version.v1,
     **kwargs: Any,
-) -> Union[np.ndarray, bytearray]:
-    """Return the decoded RLE Lossless data as a :class:`numpy.ndarray`.
+) -> np.ndarray | bytearray:
+    """Return the decoded RLE Lossless data as a :class:`numpy.ndarray` or
+    :class:`bytearray`.
+
+    .. versionchanged:: 2.2
+
+        Added the ``"pack_bits"`` decoding option and support for *Bits Allocated* 1.
 
     Parameters
     ----------
@@ -33,7 +45,7 @@ def decode_pixel_data(
     ds : pydicom.dataset.Dataset, optional
         A :class:`~pydicom.dataset.Dataset` containing the group ``0x0028``
         elements corresponding to the image frame. If not used then `kwargs`
-        must be supplied. Only used with version ``1``.
+        must be supplied. Only used with `version` ``1``.
     version : int, optional
 
         * If ``1`` (default) then return the image data as an :class:`numpy.ndarray`
@@ -41,22 +53,26 @@ def decode_pixel_data(
     **kwargs
         Required keys if `ds` is not supplied or if version is ``2``:
 
-        * ``"rows"``: :class:`int` - the number of rows in the decoded image
+        * ``"rows"``: :class:`int` - the number of rows in the decoded image.
         * ``"columns"``: :class:`int` - the number of columns in the decoded
-          image
+          image.
         * ``"bits_allocated"``: :class:`int` - the number of bits allocated
-          to each pixel
+          to each pixel.
 
         Current decoding options are:
 
-        * ``{'byteorder': str}`` specify the byte ordering for the decoded data
-        when more than 8 bits per pixel are used, should be '<' for little
-        endian ordering (default) or '>' for big-endian ordering.
+        * ``"byteorder"``: :class:`str` - specify the byte ordering for the decoded
+          data when more than 8 bits per pixel are used, should be ``'<'`` for little
+          endian ordering (default) or ``'>'`` for big-endian ordering.
+        * ``"pack_bits"``: :class:`bool` - (`version` 2 only) if ``True`` and
+          ``"bits_allocated"`` is ``1`` then return the decoded data in its packed
+          form (8 pixels per byte), otherwise return the decoded data in its unpacked
+          form (1 pixel per byte). Default ``False``.
 
     Returns
     -------
     bytearray | numpy.ndarray
-        The image data as either a bytearray or ndarray.
+        The image data as either a bytearray (`version` 2) or ndarray (`version` 1).
 
     Raises
     ------
@@ -93,12 +109,18 @@ def decode_pixel_data(
     rows = cast(int, kwargs.get("rows"))
     bits_allocated = cast(int, kwargs.get("bits_allocated"))
     byteorder = kwargs.get("byteorder", "<")
+    as_packed = kwargs.get("pack_bits", False)
+
+    frame: bytearray = decode_frame(src, rows * columns, bits_allocated, byteorder)
 
-    return cast(bytearray, decode_frame(src, rows * columns, bits_allocated, byteorder))
+    if bits_allocated != 1 or as_packed is False:
+        return frame
+
+    return _pack_bits(frame, "<")
 
 
 def encode_array(
-    arr: "np.ndarray", ds: Optional["Dataset"] = None, **kwargs: Any
+    arr: "np.ndarray", ds: "Dataset | None" = None, **kwargs: Any
 ) -> Iterator[bytes]:
     """Yield RLE encoded frames from `arr`.
 
@@ -157,8 +179,8 @@ def encode_array(
 
 def encode_pixel_data(
     src: bytes,
-    ds: Optional["Dataset"] = None,
-    byteorder: Optional[str] = None,
+    ds: "Dataset | None" = None,
+    byteorder: str | None = None,
     **kwargs: Any,
 ) -> bytes:
     """Return `src` encoded using the DICOM RLE (PackBits) algorithm.
@@ -178,7 +200,9 @@ def encode_pixel_data(
     Parameters
     ----------
     src : bytes
-        The data for a single image frame data to be RLE encoded.
+        The data for a single image frame data to be RLE encoded. For *Bits Allocated*
+        1 if `src` contains bit-packed data then it will be automatically unpacked
+        prior to encoding.
     ds : pydicom.dataset.Dataset, optional
         The dataset corresponding to `src` with matching values for *Rows*,
         *Columns*, *Samples per Pixel* and *Bits Allocated*. Required if
@@ -195,7 +219,7 @@ def encode_pixel_data(
         * ``'samples_per_pixel'``: :class:`int` the number of samples per pixel, either
           1 for monochrome or 3 for RGB or similar data.
         * ``'bits_allocated'``: :class:`int` the number of bits needed to contain each
-          pixel, either 1, 8, 16, 32 or 64.
+          pixel, must be 1, 8, 16, 32 or 64.
 
     Returns
     -------
@@ -214,34 +238,37 @@ def encode_pixel_data(
         spp = kwargs["samples_per_pixel"]
 
     # Validate input
-    if spp not in [1, 3]:
+    if spp not in (1, 3):
         msg = "(0028,0002) 'Samples per Pixel'" if ds else "'samples_per_pixel'"
         raise ValueError(f"{msg} must be 1 or 3")
 
-    if bpp not in [1, 8, 16, 32, 64]:
+    if bpp not in (1, 8, 16, 32, 64):
         msg = "(0028,0100) 'Bits Allocated'" if ds else "'bits_allocated'"
         raise ValueError(f"{msg} must be 1, 8, 16, 32 or 64")
 
-    if bpp / 8 * spp > 15:
+    if bpp != 1 and bpp / 8 * spp > 15:
         raise ValueError(
             "Unable to encode the data as the RLE format used by the DICOM "
             "Standard only allows a maximum of 15 segments"
         )
 
-    byteorder = "<" if bpp == 8 else byteorder
+    byteorder = "<" if bpp <= 8 else byteorder
     if byteorder not in ("<", ">"):
         raise ValueError(
             "A valid 'byteorder' is required when the number of bits per "
             "pixel is greater than 8"
         )
 
-    if len(src) != (r * c * bpp / 8 * spp):
+    src_length = len(src)
+    if bpp == 1 and src_length == math.ceil((r * c) / 8):
+        # src is bit packed, so unpack
+        src = _unpack_bits(src, r * c, "<")
+    elif src_length != (r * c * bpp / 8 * spp):
         raise ValueError("The length of the data doesn't match the image parameters")
 
     return cast(bytes, encode_frame(src, r, c, spp, bpp, byteorder))
 
 
-# TODO: unpack Bits Stored 1
 def generate_frames(ds: "Dataset", reshape: bool = True) -> Iterator[np.ndarray]:
     """Yield a *Pixel Data* frame from `ds` as an :class:`~numpy.ndarray`.
 
@@ -317,7 +344,6 @@ def generate_frames(ds: "Dataset", reshape: bool = True) -> Iterator[np.ndarray]
             yield arr.transpose(1, 2, 0)
 
 
-# TODO: unpack Bits Stored 1
 def pixel_array(ds: "Dataset") -> "np.ndarray":
     """Return the entire *Pixel Data* as an :class:`~numpy.ndarray`.
 
@@ -376,3 +402,57 @@ def pixel_data(arr: "np.ndarray", ds: "Dataset") -> bytes:
     from pydicom.encaps import encapsulate
 
     return encapsulate([ii for ii in encode_array(arr, ds)])
+
+
+def pack_bits(src: bytes | bytearray, bitorder: str = "<") -> bytearray:
+    """Bit pack `src`
+
+    .. versionadded: 2.2
+
+    Parameters
+    ----------
+    src : bytes | bytearray
+        The data to be bit-packed, should only contain zeros and ones.
+    bitorder : str, optional
+        The bit ordering to use for the packed data, should be '<' for little endian
+        ordering (default) or '>' for big-endian ordering. For example, if `src` is
+        ``b"\x00\x01\x00\x00"`` then ``0x02`` (``0b00000010``) will be returned for
+        little endian ordering and ``0x40`` (``0b01000000``) for big endian ordering.
+
+    Returns
+    -------
+    bytearray
+        The bit-packed data, one byte per 8 bytes of the original with padding bits
+        added if the length of `src` is not a multiple of 8.
+    """
+    return _pack_bits(src, bitorder)
+
+
+def unpack_bits(
+    src: bytes | bytearray, count: int | None = None, bitorder: str = "<"
+    ) -> bytearray:
+    """Bit unpack `src`
+
+    .. versionadded: 2.2
+
+    Parameters
+    ----------
+    src : bytes | bytearray
+        The data to be unpacked.
+    count : int | None, optional
+        If ``None`` (default) then return the entire unpacked `src`, otherwise return
+        the first `count` number of bytes from the unpacked `src`.
+    bitorder : str, optional
+        The bit ordering to used by the packed data, should be '<' for little endian
+        ordering (default) or '>' for big-endian ordering.
+
+    Returns
+    -------
+    bytearray
+        The unpacked data, 8 bytes per byte of `src`.
+    """
+    maximum_nr_bits = len(src) * 8
+    if count is None or not (0 < count <= maximum_nr_bits):
+        count = maximum_nr_bits
+
+    return _unpack_bits(src, count, bitorder)
diff --git a/src/lib.rs b/src/lib.rs
index aa04348..32088f9 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -21,10 +21,132 @@ fn rle(m: &Bound<'_, PyModule>) -> PyResult<()> {
     m.add_function(wrap_pyfunction!(encode_segment, m)?);
     m.add_function(wrap_pyfunction!(encode_frame, m)?);
 
+    m.add_function(wrap_pyfunction!(pack_bits, m)?);
+    m.add_function(wrap_pyfunction!(unpack_bits, m)?);
+
     Ok(())
 }
 
 
+// Utilities
+// ---------
+
+#[pyfunction]
+fn pack_bits<'py>(
+    src: Vec<u8>, bitorder: char, py: Python<'py>
+) -> PyResult<Bound<'py, PyByteArray>> {
+
+    match bitorder {
+        '>' | '<' => {},
+        _ => { return Err(PyValueError::new_err("'bitorder' must be '>' or '<'")) }
+    }
+
+    // Check values are in (0, 1)
+    if src.iter().max() > Some(&1u8) {
+        return Err(
+            PyValueError::new_err("Only binary input (containing zeros or ones) can be packed")
+        )
+    }
+
+    let mut dst: Vec<u8> = Vec::new();
+
+    if bitorder == '<' {
+        // Bits use little endian ordering
+        for chunk in src.chunks_exact(8) {
+            let mut packed = 0u8;
+            for idx in 0..8 {
+                packed |= chunk[idx] << idx;
+            }
+            dst.push(packed);
+        }
+    } else {
+        // Bits use big endian ordering
+        for chunk in src.chunks_exact(8) {
+            let mut packed = 0u8;
+            for idx in 0..8 {
+                packed |= chunk[idx] << (7 - idx);
+            }
+            dst.push(packed);
+        }
+    }
+
+    let remainder = src.len() % 8;
+    if remainder > 0 {
+        let mut last_byte = 0u8;
+        // ..., 0, 1, 0, 0 iterates as 0, 0, 1, 0
+        if bitorder == '<' {
+            for (idx, bit) in src.iter().rev().take(remainder).enumerate() {
+                // 0 0 0 0 0 0 1 0
+                last_byte |= bit << (remainder - idx - 1);
+            }
+        } else {
+            for (idx, bit) in src.iter().rev().take(remainder).enumerate() {
+                // 0 1 0 0 0 0 0 0
+                last_byte |= bit << (8 - remainder - idx);
+            }
+        }
+        dst.push(last_byte);
+    }
+
+    Ok(PyByteArray::new(py, &dst))
+}
+
+
+#[pyfunction]
+fn unpack_bits<'py>(
+    src: Vec<u8>, count: u128, bitorder: char, py: Python<'py>
+) -> PyResult<Bound<'py, PyByteArray>> {
+    match bitorder {
+        '>' | '<' => {},
+        _ => { return Err(PyValueError::new_err("'bitorder' must be '>' or '<'")) }
+    }
+
+    // The maximum value of `count` should be 2^65
+    let nr_bits: u128;
+    let nr_bytes = u128::try_from(src.len()).unwrap();
+    if count == 0 || count > nr_bytes * 8 {
+        nr_bits = nr_bytes * 8;
+    } else {
+        nr_bits = count;
+    }
+
+    let mut dst: Vec<u8> = Vec::new();
+    // Shouldn't be more than 2^64
+    let nr_whole_bytes = usize::try_from(nr_bits / 8).unwrap();
+    // Shouldn't be more than 7
+    let nr_remainder_bits = usize::try_from(nr_bits % 8).unwrap();
+
+    if bitorder == '<' {
+        // Unpack the whole bytes
+        for offset in  0..nr_whole_bytes {
+            for idx in 0..8 {
+                dst.push((src[offset] >> idx) & 1u8);
+            }
+        }
+        // Do the final (partial) byte, if required
+        if nr_remainder_bits != 0 {
+            for idx in 0..nr_remainder_bits {
+                dst.push((src[nr_whole_bytes] >> idx) & 1u8);
+            }
+        }
+    } else {
+        for offset in  0..nr_whole_bytes {
+            for idx in 0..8 {
+                dst.push((src[offset] >> (7 - idx)) & 1u8);
+            }
+        }
+        if nr_remainder_bits != 0 {
+            for idx in 0..nr_remainder_bits {
+                dst.push((src[nr_whole_bytes] >> (7 - idx)) & 1u8);
+            }
+        }
+
+    }
+
+    Ok(PyByteArray::new(py, &dst))
+}
+
+
 // RLE Decoding
 // ------------
 
@@ -147,6 +269,11 @@ fn _decode_frame(
     let err_invalid_nr_samples = Err(
         String::from("The (0028,0002) 'Samples per Pixel' must be 1 or 3").into()
     );
+    let err_invalid_nr_samples_ba1 = Err(
+        String::from(
+            "The (0028,0002) 'Samples per Pixel' must be 1 if (0028,0100) 'Bits Allocated' is 1"
+        ).into()
+    );
     let err_segment_length = Err(
         String::from(
             "The decoded segment length does not match the expected length"
@@ -157,12 +284,10 @@ fn _decode_frame(
     );
 
     // Check 'Bits Allocated' is 1 or a multiple of 8
-    let mut is_bit_packed = false;
-    let mut bytes_per_pixel: u8;  // Valid values are 1 | 2 | 4 | 8
+    let bytes_per_pixel: u8;  // Valid values are 1 | 2 | 4 | 8
     match bpp {
         0 => return err_invalid_bits_allocated,
         1 => {
-                is_bit_packed = true;
                 bytes_per_pixel = 1;
         },
         _ => match bpp % 8 {
@@ -224,9 +349,9 @@ fn _decode_frame(
     match spp {
         1 => {},
         3 => {
-            // Bit packed data must be 1 sample per pixel
+            // Bits allocated 1 must be 1 sample per pixel
             match bpp {
-                1 => { return err_invalid_nr_samples },
+                1 => { return err_invalid_nr_samples_ba1 },
                 _ => {}
             }
         },
@@ -254,79 +379,50 @@ fn _decode_frame(
     // TODO: handle unwrap
     let pps = usize::try_from(nr_pixels).unwrap();
     // Concatenate sample planes into a frame
-    if !is_bit_packed {
-        // Watch for overflow here; u32 * u32 -> u64
-        // Actual values are (u16 * u16) * u8
-        let expected_length = usize::try_from(
-            nr_pixels * nr_segments
-        ).unwrap();
-
-        // Pre-allocate a vector for the decoded frame
-        let mut frame = vec![0u8; expected_length];
-
-        for sample in 0..spp {  // 0 or (0, 1, 2)
-            // Sample offset
-            let so = usize::from(sample * bytes_per_pixel) * pps;
-
-            // Interleave the segments into a sample plane
-            for byte_offset in 0..bytes_per_pixel {  // 0, [1, 2, 3, ..., 7]
-                // idx should be in range [0, 15]
-                let idx: usize;
-                if byteorder == '>' { // big-endian
-                    // e.g. 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
-                    idx = usize::from(sample * bytes_per_pixel + byte_offset);
-                } else { // little-endian
-                    // e.g. 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8
-                    idx = usize::from(
-                        bytes_per_pixel - byte_offset + bytes_per_pixel * sample
-                    ) - 1;
-                }
-
-                // offsets[idx] is u32 -> usize not guaranteed
-                let start = usize::try_from(offsets[idx]).unwrap();
-                let end = usize::try_from(offsets[idx + 1]).unwrap();
-
-                // Decode the segment into the frame
-                let len = _decode_segment_into_frame(
-                    <&[u8]>::try_from(&src[start..end]).unwrap(),
-                    &mut frame,
-                    usize::from(bytes_per_pixel),
-                    usize::from(byte_offset) + so
-                )?;
-
-                if len != pps { return err_segment_length }
+    // Watch for overflow here; u32 * u32 -> u64
+    // Actual values are (u16 * u16) * u8
+    let expected_length = usize::try_from(
+        nr_pixels * u32::from(nr_segments)
+    ).unwrap();
+
+    // Pre-allocate a vector for the decoded frame
+    let mut frame = vec![0u8; expected_length];
+
+    for sample in 0..spp {  // 0 or (0, 1, 2)
+        // Sample offset
+        let so = usize::from(sample * bytes_per_pixel) * pps;
+
+        // Interleave the segments into a sample plane
+        for byte_offset in 0..bytes_per_pixel {  // 0, [1, 2, 3, ..., 7]
+            // idx should be in range [0, 15]
+            let idx: usize;
+            if byteorder == '>' { // big-endian
+                // e.g. 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
+                idx = usize::from(sample * bytes_per_pixel + byte_offset);
+            } else { // little-endian
+                // e.g. 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8
+                idx = usize::from(
+                    bytes_per_pixel - byte_offset + bytes_per_pixel * sample
+                ) - 1;
             }
-        }
 
-        return Ok(frame)
-    }
+            // offsets[idx] is u32 -> usize not guaranteed
+            let start = usize::try_from(offsets[idx]).unwrap();
+            let end = usize::try_from(offsets[idx + 1]).unwrap();
 
-    /* Bit-packed data
-    ------------------
-    For bit-packed data (i.e. a *Bits Allocated* of 1), we decode the RLE encoded data but
-    leave the bit-packing in place. This means the length of each decoded segment is not
-    equal to the number of pixels but instead the number of pixels / 8, rounded up to the
-    nearest whole integer, with the difference being accounted for by 0b0 padding bits.
-    */
+            // Decode the segment into the frame
+            let len = _decode_segment_into_frame(
+                <&[u8]>::try_from(&src[start..end]).unwrap(),
+                &mut frame,
+                usize::from(bytes_per_pixel),
+                usize::from(byte_offset) + so
+            )?;
 
-    // The expected number of whole bytes per segment after decoding
-    let mut bytes_per_segment: usize;
-    match nr_pixels % 8 {
-        0 => { bytes_per_segment = pps / 8; },
-        _ => { bytes_per_segment = (pps + (8 - pps % 8)) / 8; },
+            if len != pps { return err_segment_length }
+        }
     }
 
-    // For bit-packed data we can only have 1 segment.
-    let start = usize::try_from(offsets[0]).unwrap();
-    let end = usize::try_from(offsets[1]).unwrap();
-    let segment = _decode_bit_packed_segment(
-        <&[u8]>::try_from(&src[start..end]).unwrap(),
-        bytes_per_segment,
-    )?;
-
-    if segment.len() != bytes_per_segment) { return err_segment_length }
-
-    return Ok(segment)
+    return Ok(frame)
 }
 
 
@@ -581,7 +677,7 @@ fn _decode_segment(src: &[u8], dst: &mut Vec<u8>) -> Result<(), Box<dyn Error>>
 
 #[pyfunction]
 fn encode_frame<'py>(
-    src: &[u8], rows: u16, cols: u16, spp: u8, bpp: u8, byteorder: char, py: Python<'py>
+    src: Vec<u8>, rows: u16, cols: u16, spp: u8, bpp: u8, byteorder: char, py: Python<'py>
 ) -> PyResult<Bound<'py, PyBytes>> {
     /* Return RLE encoded `src` as bytes.
 
@@ -616,7 +712,7 @@ fn encode_frame<'py>(
 
 
 fn _encode_frame(
-    src: &[u8], dst: &mut Vec<u8>, rows: u16, cols: u16, spp: u8, bpp: u8, byteorder: char
+    src: Vec<u8>, dst: &mut Vec<u8>, rows: u16, cols: u16, spp: u8, bpp: u8, byteorder: char
 ) -> Result<(), Box<dyn Error>> {
     /*
 
@@ -645,6 +741,11 @@ fn _encode_frame(
     let err_invalid_nr_samples = Err(
         String::from("The (0028,0002) 'Samples per Pixel' must be 1 or 3").into()
     );
+    let err_invalid_nr_samples_ba1 = Err(
+        String::from(
+            "The (0028,0002) 'Samples per Pixel' must be 1 if (0028,0100) 'Bits Allocated' is 1"
+        ).into()
+    );
     let err_invalid_bits_allocated = Err(
         String::from(
             "The (0028,0100) 'Bits Allocated' value must be 1, 8, 16, 32 or 64"
@@ -671,24 +772,18 @@ fn _encode_frame(
     // Check 'Samples per Pixel' is either 1 or 3
     // Check 'Bits Allocated' is 1 or a multiple of 8
     // Check 'Samples per Pixel' is 1 if 'Bits Allocated' is 1
-    let mut is_bit_packed = false;
-    let mut bytes_per_pixel: u8;
+    let bytes_per_pixel: u8;
     match spp {
         1 => {
             match bpp {
-                1 => {
-                    is_bit_packed = true;
-                    bytes_per_pixel = 1;
-                },
-                8 | 16 | 32 | 64 => {
-                    bytes_per_pixel = bpp / 8;
-                },
+                1 => { bytes_per_pixel = 1; },
+                8 | 16 | 32 | 64 => { bytes_per_pixel = bpp / 8; },
                 _ => { return err_invalid_bits_allocated }
             }
         }
         3 => {
             match bpp {
-                1 => { return err_invalid_nr_samples },
+                1 => { return err_invalid_nr_samples_ba1 },
                 8 | 16 | 32 | 64 => { bytes_per_pixel = bpp / 8; },
                 _ => { return err_invalid_bits_allocated }
             }
@@ -713,16 +808,9 @@ fn _encode_frame(
     let c = usize::try_from(cols).unwrap();
 
     let total_pixels = r * c * usize::from(spp);
-    if !is_bit_packed {
-        let total_length = total_pixels * usize::from(bytes_per_pixel);
-        if src.len() != total_length { return err_invalid_parameters }
-    } else {
-        let mut total_length: usize;
-        match total_pixels % 8 {
-            0 => { total_length = total_pixels / 8; },
-            _ => { total_length = (total_pixels + (8 - total_pixels % 8)) / 8; }
-        }
-        if src.len() != total_length { return err_invalid_parameters }
+    let total_length = total_pixels * usize::from(bytes_per_pixel);
+    if src.len() != total_length {
+        return err_invalid_parameters
     }
 
     let nr_segments: u8 = spp * bytes_per_pixel;
@@ -756,21 +844,13 @@ fn _encode_frame(
         }
 
         // Encode! Note the offset start of the `src` iter
-        if !is_bit_packed {
-            let segment: Vec<u8> = src[start_indices[idx]..]
-                .into_iter()
-                .step_by(usize::from(spp * bytes_per_pixel))
-                .cloned()
-                .collect();
-
-            _encode_segment_from_vector(segment, dst, cols)?;
-        } else {
-            // Should only ever be 1 sample per pixel -> 1 segment
-            // cols is wrong here, should be / 8
-            // Also the DICOM Standard says each row should be encoded separately,
-            //  what do we do if the number of columns isn't divisble by 8?
-            _encode_segment_from_vector(src, dst, cols)?;
-        }
+        let segment: Vec<u8> = src[start_indices[idx]..]
+            .into_iter()
+            .step_by(usize::from(spp * bytes_per_pixel))
+            .cloned()
+            .collect();
+
+        _encode_segment_from_vector(segment, dst, cols)?;
     }
 
     Ok(())

From f4fa1dbf9ccbb2abf3a0346df5d694ebafa4a618 Mon Sep 17 00:00:00 2001
From: scaramallion <scaramallion@users.noreply.github.com>
Date: Mon, 8 Sep 2025 08:47:50 +1000
Subject: [PATCH 6/9] Cleanup and fix 3.14 tests

---
 .github/workflows/pytest-builds.yml | 12 ++++-
 src/lib.rs                          | 70 -----------------------------
 2 files changed, 11 insertions(+), 71 deletions(-)

diff --git a/.github/workflows/pytest-builds.yml b/.github/workflows/pytest-builds.yml
index 4022531..53fb4dc 100644
--- a/.github/workflows/pytest-builds.yml
+++ b/.github/workflows/pytest-builds.yml
@@ -30,13 +30,23 @@ jobs:
       run:
         curl https://sh.rustup.rs -sSf | sh -s -- -y
 
-    - name: Install dependencies
+    - name: Install dependencies (!= 3.14)
+      if: ${{ matrix.python-version }} != '3.14'
       run: |
         pip install --upgrade pip
         pip install setuptools-rust pytest pydicom coverage pytest-cov
         pip install git+https://github.com/pydicom/pylibjpeg-data
         pip install -e .
 
+    - name: Install dependencies (== 3.14)
+      if: ${{ matrix.python-version }} == '3.14'
+      run: |
+        pip install --upgrade pip
+        pip install setuptools-rust pytest coverage pytest-cov
+        pip install git+https://github.com/pydicom/pylibjpeg-data
+        pip install git+https://github.com/pydicom/pydicom
+        pip install -e .
+
     - name: Test with pytest
       env:
         PYTHON_VERSION: ${{ matrix.python-version }}
diff --git a/src/lib.rs b/src/lib.rs
index 32088f9..435a6a0 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -2,8 +2,6 @@
 use std::convert::TryFrom;
 use std::error::Error;
 
-// use bitvec::prelude::*;
-
 use pyo3::prelude::*;
 use pyo3::wrap_pyfunction;
 use pyo3::types::{PyBytes, PyByteArray};
@@ -426,74 +424,6 @@ fn _decode_frame(
 }
 
 
-fn _decode_bit_packed_segment(
-    src: &[u8], segment_length: usize
-) -> Result<Vec<u8>, Box<dyn Error>> {
-    /*
-
-    The returned data is guaranteed to be no more than segment_length in size.
-    */
-    let eod = src.len() - 1;
-    let mut pos = 0;
-    let mut header_byte: usize;
-    let mut dst: Vec<u8> = Vec::new();
-    let mut op_len: usize;
-    let mut idx: usize = 0;  // number of bytes decoded
-
-    let err_eod = Err(
-        String::from(
-            "The end of the data was reached before the segment was completely decoded"
-        ).into()
-    );
-
-    loop {
-        // `header_byte` is equivalent to N in the DICOM Standard
-        // usize is at least u8
-        header_byte = usize::from(src[pos]);
-        pos += 1;
-        if header_byte > 128 {
-            // Extend by copying the next byte (-N + 1) times
-            // however since using uint8 instead of int8 this will be
-            // (256 - N + 1) times
-            op_len = 257 - header_byte;
-
-            // Check we have enough encoded data
-            if pos > eod { return err_eod }
-
-            // Check segment for excess padding
-            if (idx + op_len) > segment_length {
-                dst.extend(vec![src[pos]; segment_length - idx]);
-
-                return Ok(dst)
-            }
-
-            dst.extend(vec![src[pos]; op_len]);
-            idx += op_len;
-            pos += 1;
-        } else if header_byte < 128 {
-            // Extend by literally copying the next (N + 1) bytes
-            op_len = header_byte + 1;
-
-            // Check we have enough encoded data
-            if (pos + header_byte) > eod { return err_eod }
-
-            // Check segment for excess padding
-            if (idx + op_len) > segment_length {
-                dst.extend(&src[pos..pos + segment_length - idx]);
-
-                return Ok(dst)
-            }
-
-            dst.extend(&src[pos..pos + op_len]);
-            pos += header_byte + 1;
-            idx += op_len;
-        } // header_byte == 128 is noop
-
-        if pos >= eod { return Ok(dst) }
-    }
-}
-
-
 fn _decode_segment_into_frame(
     src: &[u8], dst: &mut Vec<u8>, bpp: usize, initial_offset: usize
 ) -> Result<usize, Box<dyn Error>> {

From 027a75464b120f3335538025b1ebcfefe22a3697 Mon Sep 17 00:00:00 2001
From: scaramallion <scaramallion@users.noreply.github.com>
Date: Mon, 8 Sep 2025 08:50:42 +1000
Subject: [PATCH 7/9] Fix tests check

---
 .github/workflows/pytest-builds.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/pytest-builds.yml b/.github/workflows/pytest-builds.yml
index 53fb4dc..90d1e22 100644
--- a/.github/workflows/pytest-builds.yml
+++ b/.github/workflows/pytest-builds.yml
@@ -31,7 +31,7 @@ jobs:
         curl https://sh.rustup.rs -sSf | sh -s -- -y
 
     - name: Install dependencies (!= 3.14)
-      if: ${{ matrix.python-version }} != '3.14'
+      if: ${{ matrix.python-version != '3.14' }}
       run: |
         pip install --upgrade pip
         pip install setuptools-rust pytest pydicom coverage pytest-cov
@@ -39,7 +39,7 @@ jobs:
         pip install -e .
 
     - name: Install dependencies (== 3.14)
-      if: ${{ matrix.python-version }} == '3.14'
+      if: ${{ matrix.python-version == '3.14' }}
       run: |
         pip install --upgrade pip
         pip install setuptools-rust pytest coverage pytest-cov

From 0df89e219ac828886ec2b9be18b11b09013a6c4b Mon Sep 17 00:00:00 2001
From: scaramallion <scaramallion@users.noreply.github.com>
Date: Mon, 8 Sep 2025 08:58:27 +1000
Subject: [PATCH 8/9] Add tests for missing coverage

---
 rle/tests/test_utils.py | 107 ++++++++++++++++++++++++++++------------
 rle/utils.py            |   2 +-
 2 files changed, 76 insertions(+), 33 deletions(-)

diff --git a/rle/tests/test_utils.py b/rle/tests/test_utils.py
index 3fe248e..371d865 100644
--- a/rle/tests/test_utils.py
+++ b/rle/tests/test_utils.py
@@ -25,8 +25,10 @@
     encode_array,
     pixel_data,
     pixel_array,
+    pack_bits,
+    unpack_bits,
 )
-from rle.rle import pack_bits, unpack_bits
+from rle.rle import pack_bits as _pack_bits, unpack_bits as _unpack_bits
 
 
 INDEX_LEE = get_indexed_datasets("1.2.840.10008.1.2.1")
@@ -383,10 +385,20 @@ def test_pixel_data(self):
 
 
 class TestUnpackBits:
-    """Tests for unpack_bits()."""
+    """Tests for _unpack_bits()."""
 
     @pytest.mark.parametrize("src, little, big", REFERENCE_PACK_UNPACK)
     def test_unpack_bytes(self, src, little, big):
+        """Test unpacking data without numpy."""
+        as_bytes = pack(f"{len(little)}B", *little)
+        assert _unpack_bits(src, 0, "<") == as_bytes
+        assert _unpack_bits(src, 32, "<") == as_bytes
+        as_bytes = pack(f"{len(big)}B", *big)
+        assert _unpack_bits(src, 0, ">") == as_bytes
+        assert _unpack_bits(src, 32, ">") == as_bytes
+
+    @pytest.mark.parametrize("src, little, big", REFERENCE_PACK_UNPACK)
+    def test_unpack_bytes_util(self, src, little, big):
         """Test unpacking data without numpy."""
         as_bytes = pack(f"{len(little)}B", *little)
         assert unpack_bits(src, 0, "<") == as_bytes
@@ -397,6 +409,31 @@ def test_unpack_bytes(self, src, little, big):
 
     def test_count_little(self):
         """Test the `count` parameter for little endian unpacking."""
+        assert _unpack_bits(b"\x00", 1, "<") == b"\x00"
+        assert _unpack_bits(b"\xff", 1, "<") == b"\x01"
+        assert _unpack_bits(b"\xff", 2, "<") == b"\x01" * 2
+        assert _unpack_bits(b"\xff", 3, "<") == b"\x01" * 3
+        assert _unpack_bits(b"\xff", 4, "<") == b"\x01" * 4
+        assert _unpack_bits(b"\xff", 5, "<") == b"\x01" * 5
+        assert _unpack_bits(b"\xff", 6, "<") == b"\x01" * 6
+        assert _unpack_bits(b"\xff", 7, "<") == b"\x01" * 7
+        assert _unpack_bits(b"\xff", 8, "<") == b"\x01" * 8
+        assert _unpack_bits(b"\xff\xAA", 9, "<") == b"\x01" * 8 + b"\x00"
+        assert _unpack_bits(b"\xff\xAA", 10, "<") == b"\x01" * 8 + b"\x00\x01"
+        assert _unpack_bits(b"\xff\xAA", 11, "<") == b"\x01" * 8 + b"\x00\x01\x00"
+        assert _unpack_bits(b"\xff\xAA", 12, "<") == b"\x01" * 8 + b"\x00\x01" * 2
+        assert _unpack_bits(b"\xff\xAA", 13, "<") == b"\x01" * 8 + b"\x00\x01" * 2 + b"\x00"
+        assert _unpack_bits(b"\xff\xAA", 14, "<") == b"\x01" * 8 + b"\x00\x01" * 3
+        assert _unpack_bits(b"\xff\xAA", 15, "<") == b"\x01" * 8 + b"\x00\x01" * 3 + b"\x00"
+        assert _unpack_bits(b"\xff\xAA", 16, "<") == b"\x01" * 8 + b"\x00\x01" * 4
+
+    def test_count_little_util(self):
+        """Test the `count` parameter for little endian unpacking."""
+        assert unpack_bits(b"\x00", -1, "<") == b"\x00" * 8
+        assert unpack_bits(b"\x00", 10, "<") == b"\x00" * 8
+        assert unpack_bits(b"\x00", None, "<") == b"\x00" * 8
+        assert unpack_bits(b"\x00", 0, "<") == b"\x00" * 8
+
         assert unpack_bits(b"\x00", 1, "<") == b"\x00"
         assert unpack_bits(b"\xff", 1, "<") == b"\x01"
         assert unpack_bits(b"\xff", 2, "<") == b"\x01" * 2
@@ -417,31 +454,31 @@ def test_count_little(self):
 
     def test_count_big(self):
         """Test the `count` parameter for big endian unpacking."""
-        assert unpack_bits(b"\x00", 1, ">") == b"\x00"
-        assert unpack_bits(b"\xff", 1, ">") == b"\x01"
-        assert unpack_bits(b"\xff", 2, ">") == b"\x01" * 2
-        assert unpack_bits(b"\xff", 3, ">") == b"\x01" * 3
-        assert unpack_bits(b"\xff", 4, ">") == b"\x01" * 4
-        assert unpack_bits(b"\xff", 5, ">") == b"\x01" * 5
-        assert unpack_bits(b"\xff", 6, ">") == b"\x01" * 6
-        assert unpack_bits(b"\xff", 7, ">") == b"\x01" * 7
-        assert unpack_bits(b"\xff", 8, ">") == b"\x01" * 8
-        assert unpack_bits(b"\xff\xAA", 9, ">") == b"\x01" * 8 + b"\x01"
-        assert unpack_bits(b"\xff\xAA", 10, ">") == b"\x01" * 8 + b"\x01\x00"
-        assert unpack_bits(b"\xff\xAA", 11, ">") == b"\x01" * 8 + b"\x01\x00\x01"
-        assert unpack_bits(b"\xff\xAA", 12, ">") == b"\x01" * 8 + b"\x01\x00" * 2
-        assert unpack_bits(b"\xff\xAA", 13, ">") == b"\x01" * 8 + b"\x01\x00" * 2 + b"\x01"
-        assert unpack_bits(b"\xff\xAA", 14, ">") == b"\x01" * 8 + b"\x01\x00" * 3
-        assert unpack_bits(b"\xff\xAA", 15, ">") == b"\x01" * 8 + b"\x01\x00" * 3 + b"\x01"
-        assert unpack_bits(b"\xff\xAA", 16, ">") == b"\x01" * 8 + b"\x01\x00" * 4
+        assert _unpack_bits(b"\x00", 1, ">") == b"\x00"
+        assert _unpack_bits(b"\xff", 1, ">") == b"\x01"
+        assert _unpack_bits(b"\xff", 2, ">") == b"\x01" * 2
+        assert _unpack_bits(b"\xff", 3, ">") == b"\x01" * 3
+        assert _unpack_bits(b"\xff", 4, ">") == b"\x01" * 4
+        assert _unpack_bits(b"\xff", 5, ">") == b"\x01" * 5
+        assert _unpack_bits(b"\xff", 6, ">") == b"\x01" * 6
+        assert _unpack_bits(b"\xff", 7, ">") == b"\x01" * 7
+        assert _unpack_bits(b"\xff", 8, ">") == b"\x01" * 8
+        assert _unpack_bits(b"\xff\xAA", 9, ">") == b"\x01" * 8 + b"\x01"
+        assert _unpack_bits(b"\xff\xAA", 10, ">") == b"\x01" * 8 + b"\x01\x00"
+        assert _unpack_bits(b"\xff\xAA", 11, ">") == b"\x01" * 8 + b"\x01\x00\x01"
+        assert _unpack_bits(b"\xff\xAA", 12, ">") == b"\x01" * 8 + b"\x01\x00" * 2
+        assert _unpack_bits(b"\xff\xAA", 13, ">") == b"\x01" * 8 + b"\x01\x00" * 2 + b"\x01"
+        assert _unpack_bits(b"\xff\xAA", 14, ">") == b"\x01" * 8 + b"\x01\x00" * 3
+        assert _unpack_bits(b"\xff\xAA", 15, ">") == b"\x01" * 8 + b"\x01\x00" * 3 + b"\x01"
+        assert _unpack_bits(b"\xff\xAA", 16, ">") == b"\x01" * 8 + b"\x01\x00" * 4
 
     @pytest.mark.parametrize("src, little, big", REFERENCE_PACK_UNPACK)
     def test_unpack_bytearray(self, src, little, big):
         """Test unpacking data without numpy."""
         as_bytes = pack(f"{len(little)}B", *little)
-        assert unpack_bits(bytearray(src), 0, "<") == as_bytes
+        assert _unpack_bits(bytearray(src), 0, "<") == as_bytes
         as_bytes = pack(f"{len(big)}B", *big)
-        assert unpack_bits(bytearray(src), 0, ">") == as_bytes
+        assert _unpack_bits(bytearray(src), 0, ">") == as_bytes
 
 
 REFERENCE_PACK_PARTIAL_LITTLE = [
@@ -489,6 +526,12 @@ class TestPackBits:
 
     @pytest.mark.parametrize("output, little, big", REFERENCE_PACK_UNPACK)
     def test_pack_bytes(self, output, little, big):
+        """Test packing data."""
+        assert output == _pack_bits(bytes(little), "<")
+        assert output == _pack_bits(bytes(big), ">")
+
+    @pytest.mark.parametrize("output, little, big", REFERENCE_PACK_UNPACK)
+    def test_pack_bytes_utils(self, output, little, big):
         """Test packing data."""
         assert output == pack_bits(bytes(little), "<")
         assert output == pack_bits(bytes(big), ">")
@@ -496,14 +539,14 @@ def test_pack_bytes(self, output, little, big):
     @pytest.mark.parametrize("output, little, big", REFERENCE_PACK_UNPACK)
     def test_pack_bytearray(self, output, little, big):
         """Test packing data."""
-        assert output == pack_bits(bytearray(little), "<")
-        assert output == pack_bits(bytearray(big), ">")
+        assert output == _pack_bits(bytearray(little), "<")
+        assert output == _pack_bits(bytearray(big), ">")
 
     def test_non_binary_input(self):
         """Test non-binary input raises exception."""
         msg = r"Only binary input \(containing zeros or ones\) can be packed"
         with pytest.raises(ValueError, match=msg):
-            pack_bits(b"\x00\x00\x02\x00\x00\x00\x00\x00", "<")
+            _pack_bits(b"\x00\x00\x02\x00\x00\x00\x00\x00", "<")
 
     def test_bytes_input(self):
         """Repeat above test with bytes input."""
@@ -516,8 +559,8 @@ def test_bytes_input(self):
             ]
         )
         # fmt: on
-        assert b"\x00\x55\xff" == pack_bits(src, "<")
-        assert b"\x00\xAA\xff" == pack_bits(src, ">")
+        assert b"\x00\x55\xff" == _pack_bits(src, "<")
+        assert b"\x00\xAA\xff" == _pack_bits(src, ">")
 
     def test_bytearry_input(self):
         """Repeat above test with bytearray input."""
@@ -530,25 +573,25 @@ def test_bytearry_input(self):
             ]
         )
         # fmt: on
-        assert b"\x00\x55\xff" == pack_bits(src, "<")
-        assert b"\x00\xAA\xff" == pack_bits(src, ">")
+        assert b"\x00\x55\xff" == _pack_bits(src, "<")
+        assert b"\x00\xAA\xff" == _pack_bits(src, ">")
 
     @pytest.mark.parametrize("output, src", REFERENCE_PACK_PARTIAL_LITTLE)
     def test_pack_partial_bytes(self, src, output):
         """Test packing data that isn't a full byte long."""
-        assert output == pack_bits(bytes(src), "<")
+        assert output == _pack_bits(bytes(src), "<")
 
     @pytest.mark.parametrize("output, src", REFERENCE_PACK_PARTIAL_LITTLE)
     def test_pack_partial_bytearray(self, src, output):
         """Test packing data that isn't a full byte long."""
-        assert output == pack_bits(bytearray(src), "<")
+        assert output == _pack_bits(bytearray(src), "<")
 
     @pytest.mark.parametrize("output, src", REFERENCE_PACK_PARTIAL_BIG)
     def test_pack_partial_bytes_big(self, src, output):
         """Test packing data that isn't a full byte long."""
-        assert output == pack_bits(bytes(src), ">")
+        assert output == _pack_bits(bytes(src), ">")
 
     @pytest.mark.parametrize("output, src", REFERENCE_PACK_PARTIAL_BIG)
     def test_pack_partial_bytearray_big(self, src, output):
         """Test packing data that isn't a full byte long."""
-        assert output == pack_bits(bytearray(src), ">")
+        assert output == _pack_bits(bytearray(src), ">")
diff --git a/rle/utils.py b/rle/utils.py
index d15c277..439a828 100644
--- a/rle/utils.py
+++ b/rle/utils.py
@@ -429,7 +429,7 @@ def pack_bits(src: bytes | bytearray, bitorder: str = "<") -> bytearray:
 
 
 def unpack_bits(
-    src: bytes | bytearray, count: int | None = None, bitorder: str = "<"
+        src: bytes | bytearray, count: int | None = None, bitorder: str = "<"
     ) -> bytearray:
     """Bit unpack `src`
 

From 4beba4480e515d66aeb4cd34ca6e89409523f8ae Mon Sep 17 00:00:00 2001
From: scaramallion <scaramallion@users.noreply.github.com>
Date: Mon, 8 Sep 2025 09:06:34 +1000
Subject: [PATCH 9/9] Bump version to release

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index ab085eb..00a049c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -33,7 +33,7 @@ keywords = ["dicom pydicom python rle pylibjpeg rust"]
 license = "MIT"
 name = "pylibjpeg-rle"
 readme = "README.md"
-version = "2.2.0.dev0"
+version = "2.2.0"
 requires-python = ">=3.10"
 dependencies = [
   "numpy>=2.0",