From 9d644aa18ccd1050d677488c43826406be6aa00b Mon Sep 17 00:00:00 2001 From: scaramallion Date: Tue, 6 May 2025 07:52:55 +1000 Subject: [PATCH 1/9] WIP --- Cargo.lock | 40 ++++++++ Cargo.toml | 1 + README.md | 46 ++++----- pyproject.toml | 2 +- rle/utils.py | 18 +++- src/lib.rs | 253 +++++++++++++++++++++++++++++++++++++++---------- 6 files changed, 282 insertions(+), 78 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 77d35d4..fcc00f6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8,12 +8,30 @@ version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" +[[package]] +name = "bitvec" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" +dependencies = [ + "funty", + "radium", + "tap", + "wyz", +] + [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "funty" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" + [[package]] name = "heck" version = "0.5.0" @@ -66,6 +84,7 @@ dependencies = [ name = "pylibjpeg-rle" version = "1.1.0" dependencies = [ + "bitvec", "pyo3", ] @@ -141,6 +160,12 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "radium" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" + [[package]] name = "syn" version = "2.0.101" @@ -152,6 +177,12 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "tap" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" + [[package]] name = "target-lexicon" version = "0.13.2" @@ -169,3 +200,12 @@ name = "unindent" version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3" + +[[package]] +name = "wyz" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" +dependencies = [ + "tap", +] diff --git a/Cargo.toml b/Cargo.toml index a78c8e7..bec581f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,3 +12,4 @@ crate-type = ["cdylib"] [dependencies] pyo3 = { version = "0.24.2", features = ["extension-module"] } +bitvec = { version = "1.0.1" } diff --git a/README.md b/README.md index 6f0f08a..4782f7b 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ ## pylibjpeg-rle -A fast DICOM ([PackBits](https://en.wikipedia.org/wiki/PackBits)) RLE plugin for [pylibjpeg](https://github.com/pydicom/pylibjpeg), written in Rust with a Python 3.7+ wrapper. +A fast DICOM ([PackBits](https://en.wikipedia.org/wiki/PackBits)) RLE plugin for [pylibjpeg](https://github.com/pydicom/pylibjpeg), written in Rust with a Python wrapper. Linux, MacOS and Windows are all supported. @@ -85,32 +85,32 @@ ds.save_as('as_rle.dcm') ### Benchmarks #### Decoding -Time per 1000 decodes, pydicom's default RLE handler vs. pylibjpeg-rle +Time per 1000 decodes, pydicom's default RLE decoder vs. pylibjpeg-rle: | Dataset | Pixels | Bytes | pydicom | pylibjpeg-rle | | --- | --- | --- | --- | --- | -| OBXXXX1A_rle.dcm | 480,000 | 480,000 | 4.89 s | 0.79 s | -| OBXXXX1A_rle_2frame.dcm | 960,000 | 960,000 | 9.89 s | 1.65 s | -| SC_rgb_rle.dcm | 10,000 | 30,000 | 0.20 s | 0.15 s | -| SC_rgb_rle_2frame.dcm | 20,000 | 60,000 | 0.32 s | 0.18 s | -| MR_small_RLE.dcm | 4,096 | 8,192 | 0.35 s | 0.13 s | -| emri_small_RLE.dcm | 40,960 | 81,920 | 1.13 s | 0.28 s | -| SC_rgb_rle_16bit.dcm | 10,000 | 60,000 | 0.33 s | 0.17 s | -| SC_rgb_rle_16bit_2frame.dcm | 20,000 | 120,000 | 0.56 s | 0.21 s | -| rtdose_rle_1frame.dcm | 100 | 400 | 0.12 s | 0.13 s | -| rtdose_rle.dcm | 1,500 | 6,000 | 0.53 s | 0.26 s | -| SC_rgb_rle_32bit.dcm | 10,000 | 120,000 | 0.56 s | 0.19 s | -| SC_rgb_rle_32bit_2frame.dcm | 20,000 | 240,000 | 1.03 s | 0.28 s | +| OBXXXX1A_rle.dcm | 480,000 | 480,000 | 5.7 s | 1.1 s | +| OBXXXX1A_rle_2frame.dcm | 960,000 | 960,000 | 11.5 s | 2.1 s | +| SC_rgb_rle.dcm | 10,000 | 30,000 | 0.28 s | 0.19 s | +| SC_rgb_rle_2frame.dcm | 20,000 | 60,000 | 0.45 s | 0.28 s | +| MR_small_RLE.dcm | 4,096 | 8,192 | 0.46 s | 0.15 s | +| emri_small_RLE.dcm | 40,960 | 81,920 | 1.8 s | 0.67 s | +| SC_rgb_rle_16bit.dcm | 10,000 | 60,000 | 0.48 s | 0.25 s | +| SC_rgb_rle_16bit_2frame.dcm | 20,000 | 120,000 | 0.86 s | 0.39 s | +| rtdose_rle_1frame.dcm | 100 | 400 | 0.16 s | 0.13 s | +| rtdose_rle.dcm | 1,500 | 6,000 | 1.0 s | 0.64 s | +| SC_rgb_rle_32bit.dcm | 10,000 | 120,000 | 0.82 s | 0.35 s | +| SC_rgb_rle_32bit_2frame.dcm | 20,000 | 240,000 | 1.5 s | 0.60 s | #### Encoding -Time per 1000 encodes, pydicom's default RLE handler vs. pylibjpeg-rle +Time per 1000 encodes, pydicom's default RLE encoder vs. pylibjpeg-rle and [python-gdcm](https://github.com/tfmoraes/python-gdcm): -| Dataset | Pixels | Bytes | pydicom | pylibjpeg-rle | -| --- | --- | --- | --- | --- | -| OBXXXX1A.dcm | 480,000 | 480,000 | 30.7 s | 1.36 s | -| SC_rgb.dcm | 10,000 | 30,000 | 1.80 s | 0.09 s | -| MR_small.dcm | 4,096 | 8,192 | 2.29 s | 0.04 s | -| SC_rgb_16bit.dcm | 10,000 | 60,000 | 3.57 s | 0.17 s | -| rtdose_1frame.dcm | 100 | 400 | 0.19 s | 0.003 s | -| SC_rgb_32bit.dcm | 10,000 | 120,000 | 7.20 s | 0.33 s | +| Dataset | Pixels | Bytes | pydicom | pylibjpeg-rle | python-gdcm | +| --- | --- | --- | --- | --- | --- | +| OBXXXX1A.dcm | 480,000 | 480,000 | 30.6 s | 1.4 s | 1.5 s | +| SC_rgb.dcm | 10,000 | 30,000 | 1.9 s | 0.11 s | 0.21 s | +| MR_small.dcm | 4,096 | 8,192 | 3.0 s | 0.11 s | 0.29 s | +| SC_rgb_16bit.dcm | 10,000 | 60,000 | 3.6 s | 0.18 s | 0.28 s | +| rtdose_1frame.dcm | 100 | 400 | 0.28 s | 0.04 s | 0.14 s | +| SC_rgb_32bit.dcm | 10,000 | 120,000 | 7.1 s | 0.32 s | 0.43 s | diff --git a/pyproject.toml b/pyproject.toml index 859c0af..e79a7b6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,7 +32,7 @@ keywords = ["dicom pydicom python rle pylibjpeg rust"] license = "MIT" name = "pylibjpeg-rle" readme = "README.md" -version = "2.1.0" +version = "2.2.0.dev0" requires-python = ">=3.10" dependencies = [ "numpy>=2.0", diff --git a/rle/utils.py b/rle/utils.py index 63c78ef..c9e491f 100644 --- a/rle/utils.py +++ b/rle/utils.py @@ -106,6 +106,10 @@ def encode_array( .. versionadded:: 1.1 + .. versionchanged:: 2.2 + + Added support for 'bits_allocated' 1. + Parameters ---------- arr : numpy.ndarray @@ -124,7 +128,7 @@ def encode_array( * ``samples_per_px': int`` the number of samples per pixel, either 1 for monochrome or 3 for RGB or similar data. * ``'bits_per_px': int`` the number of bits needed to contain each - pixel, either 8, 16, 32 or 64. + pixel, either 1, 8, 16, 32 or 64. * ``'nr_frames': int`` the number of frames in `arr`, required if more than one frame is present. @@ -169,6 +173,10 @@ def encode_pixel_data( to 15 in order to meet the requirements of the *RLE Lossless* transfer syntax. + .. versionchanged:: 2.2 + + Added support for 'bits_allocated' 1. + Parameters ---------- src : bytes @@ -189,7 +197,7 @@ def encode_pixel_data( * ``samples_per_pixel': int`` the number of samples per pixel, either 1 for monochrome or 3 for RGB or similar data. * ``'bits_allocated': int`` the number of bits needed to contain each - pixel, either 8, 16, 32 or 64. + pixel, either 1, 8, 16, 32 or 64. Returns ------- @@ -212,9 +220,9 @@ def encode_pixel_data( msg = "(0028,0002) 'Samples per Pixel'" if ds else "'samples_per_pixel'" raise ValueError(f"{msg} must be 1 or 3") - if bpp not in [8, 16, 32, 64]: + if bpp not in [1, 8, 16, 32, 64]: msg = "(0028,0100) 'Bits Allocated'" if ds else "'bits_allocated'" - raise ValueError(f"{msg} must be 8, 16, 32 or 64") + raise ValueError(f"{msg} must be 1, 8, 16, 32 or 64") if bpp / 8 * spp > 15: raise ValueError( @@ -235,6 +243,7 @@ def encode_pixel_data( return cast(bytes, encode_frame(src, r, c, spp, bpp, byteorder)) +# TODO: unpack Bits Stored 1 def generate_frames(ds: "Dataset", reshape: bool = True) -> Iterator[np.ndarray]: """Yield a *Pixel Data* frame from `ds` as an :class:`~numpy.ndarray`. @@ -310,6 +319,7 @@ def generate_frames(ds: "Dataset", reshape: bool = True) -> Iterator[np.ndarray] yield arr.transpose(1, 2, 0) +# TODO: unpack Bits Stored 1 def pixel_array(ds: "Dataset") -> "np.ndarray": """Return the entire *Pixel Data* as an :class:`~numpy.ndarray`. diff --git a/src/lib.rs b/src/lib.rs index ecd05d6..b5f5c74 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -94,7 +94,7 @@ fn decode_frame<'py>( The total number of pixels in the frame (rows x columns), maximum (2^32 - 1). bpp : int - The number of bits per pixel, supported values 8, 16, 32, 64. + The number of bits per pixel, supported values 1, 8, 16, 32, 64. byteorder : str The byte order of the returned data, '<' for little endian, '>' for big endian. @@ -123,7 +123,7 @@ fn _decode_frame( nr_pixels The total number of pixels in the frame (rows x columns). bpp - The number of bits per pixel, should be a multiple of 8 and no larger + The number of bits per pixel, should be 1 or a multiple of 8 and no larger than 64. byteorder The byte order of the decoded data, '<' for little endian, '>' for @@ -133,7 +133,7 @@ fn _decode_frame( // Pre-define our errors for neatness let err_invalid_bits_allocated = Err( String::from( - "The (0028,0100) 'Bits Allocated' value must be 8, 16, 32 or 64" + "The (0028,0100) 'Bits Allocated' value must be 1, 8, 16, 32 or 64" ).into() ); let err_invalid_offset = Err( @@ -154,24 +154,31 @@ fn _decode_frame( String::from("'byteorder' must be '>' or '<'").into() ); - // Ensure we have a valid bits/px value + let mut is_bit_packed: bool = false; + + // Ensure we have a valid bits/px value: 1 or a multiple of 8 match bpp { 0 => return err_invalid_bits_allocated, + 1 => { is_bit_packed = true; }, _ => match bpp % 8 { - 0 => {}, + 0 => { }, _ => return err_invalid_bits_allocated } } - // Ensure `bytes_per_pixel` is in [1, 2, 4, 8] - let bytes_per_pixel: u8 = bpp / 8; - match bytes_per_pixel { - 1 => {}, - 2 | 4 | 8 => match byteorder { - '>' | '<' => {}, - _ => return err_invalid_byteorder - }, - _ => return err_invalid_bits_allocated + // For non-bit-packed data ensure `bytes_per_pixel` is in [1, 2, 4, 8] + if !is_bit_packed { + let bytes_per_pixel: u8 = bpp / 8; + match bytes_per_pixel { + 1 => {}, + 2 | 4 | 8 => match byteorder { + '>' | '<' => {}, + _ => return err_invalid_byteorder + }, + _ => return err_invalid_bits_allocated + } + } else { + } // Parse the RLE header and check results @@ -213,14 +220,6 @@ fn _decode_frame( _ => return err_invalid_nr_samples } - // Watch for overflow here; u32 * u32 -> u64 - let expected_length = usize::try_from( - nr_pixels * u32::from(bytes_per_pixel * spp) - ).unwrap(); - - // Pre-allocate a vector for the decoded frame - let mut frame = vec![0u8; expected_length]; - /* Example ------- @@ -238,47 +237,201 @@ fn _decode_frame( MSB LSB MSB LSB ... MSB LSB | MSB LSB MSB LSB ... MSB LSB | ... */ + /* + Bit-packed data + --------------- + + For bit-packed data (i.e. a *Bits Allocated* of 1), we decode the RLE encoding but + leave the bit-packing in place. This means the length of each decoded segment is not + equal to the number of pixels but instead the number of pixels / 8, rounded up to the + nearest whole integer with the difference being accounted for by padding 0b0 bits. + + Each decoded segment must have these padding bits removed and the final frame should be + the concatenated unpadded segments. Fortunately for bit-packed data there can be at + most 3 segments (with a *Samples per Pixel* of 3). + + Might be a good idea to use bool arrays instead of u8 and implicitly unpack to avoid + annoyances with multi-segment padding, then convert to u8 at the end. + + bitvec perhaps? + */ + // Decode each segment and place it into the vector // ------------------------------------------------ let pps = usize::try_from(nr_pixels).unwrap(); // Concatenate sample planes into a frame - for sample in 0..spp { // 0 or (0, 1, 2) - // Sample offset - let so = usize::from(sample * bytes_per_pixel) * pps; - - // Interleave the segments into a sample plane - for byte_offset in 0..bytes_per_pixel { // 0, [1, 2, 3, ..., 7] - // idx should be in range [0, 15] - let idx: usize; - if byteorder == '>' { // big-endian - // e.g. 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 - idx = usize::from(sample * bytes_per_pixel + byte_offset); - } else { // little-endian - // e.g. 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8 - idx = usize::from( - bytes_per_pixel - byte_offset + bytes_per_pixel * sample - ) - 1; + if !is_bit_packed { + // Watch for overflow here; u32 * u32 -> u64 + // Actual values are (u16 * u16) * u8 + let expected_length = usize::try_from( + nr_pixels * u32::from(bytes_per_pixel * spp) + ).unwrap(); + + // Pre-allocate a vector for the decoded frame + let mut frame = vec![0u8; expected_length]; + + for sample in 0..spp { // 0 or (0, 1, 2) + // Sample offset + let so = usize::from(sample * bytes_per_pixel) * pps; + + // Interleave the segments into a sample plane + for byte_offset in 0..bytes_per_pixel { // 0, [1, 2, 3, ..., 7] + // idx should be in range [0, 15] + let idx: usize; + if byteorder == '>' { // big-endian + // e.g. 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 + idx = usize::from(sample * bytes_per_pixel + byte_offset); + } else { // little-endian + // e.g. 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8 + idx = usize::from( + bytes_per_pixel - byte_offset + bytes_per_pixel * sample + ) - 1; + } + + // offsets[idx] is u32 -> usize not guaranteed + let start = usize::try_from(offsets[idx]).unwrap(); + let end = usize::try_from(offsets[idx + 1]).unwrap(); + + // Decode the segment into the frame + let len = _decode_segment_into_frame( + <&[u8]>::try_from(&src[start..end]).unwrap(), + &mut frame, + usize::from(bytes_per_pixel), + usize::from(byte_offset) + so + )?; + if len != pps { return err_segment_length } } + } + + return Ok(frame); + + // Bit-packed data + // u8 * u32 + // FIXME + let bits_per_segment = usize::try_from(8 * nr_pixels).unwrap(); // wrong! + // Each segment is padded to an even length, so segment boundaries should be + // whole integers + + // The number of whole bytes per segment, including any padding + let mut padding_bytes = 0; + match bits_per_segment % 16 { + 0 => {}, + 1..=8 => { padding_bytes = 2 }, + 9..=15 => { padding_bytes = 1 }, + } + let bytes_per_segment = bits_per_segment / 8 + padding_bytes; - // offsets[idx] is u32 -> usize not guaranteed - let start = usize::try_from(offsets[idx]).unwrap(); - let end = usize::try_from(offsets[idx + 1]).unwrap(); - - // Decode the segment into the frame - let len = _decode_segment_into_frame( - <&[u8]>::try_from(&src[start..end]).unwrap(), - &mut frame, - usize::from(bytes_per_pixel), - usize::from(byte_offset) + so - )?; - if len != pps { return err_segment_length } + let mut frame: Vec = Vec::new(); + for sample in 0..spp { // 0..0 or 0..2 + // Start and end indices of the segment + let start = usize::try_from(offsets[idx]).unwrap(); + let end = usize::try_from(offsets[idx + 1]).unwrap(); + + // Decode the segment into the frame + let segment = _decode_segment_into_frame( + <&[u8]>::try_from(&src[start..end]).unwrap(), + bytes_per_segment, + )?; + if segment.len() != bytes_per_segment { return err_segment_length } + + if spp == 0 { return Ok(segment) } + + if !is_padded { + frame.extend(segment); + } else { + // hmm } + } + // Concatenate the segments together, bit shifting to ensure any padding is removed + Ok(frame) } +/* +fn _shift_bit_packed_segment( + prefix: u8, src: Vec, shift: u8 +) -> Vec { + assert!(0 < shift < 8); + + let mut dst = Vec::new(); + // shift the prefix to the right by the required shift + dst.push(prefix >> (8 - shift)) + + for idx in 0..src.len() { + dst[idx] = src[idx] << shift; + dst[idx] |= src[idx + 1] >> (8 - shift); + } + + dst +} +*/ + + +fn _decode_bit_packed_segment( + src: &[u8], segment_length: usize +) -> Result, Box> { + // + let eod = src.len() - 1; + let mut pos = 0; + let mut header_byte: usize; + let mut dst: Vec = Vec::new(); + + let err_eod = Err( + String::from( + "The end of the data was reached before the segment was \ + completely decoded" + ).into() + ); + + loop { + // `header_byte` is equivalent to N in the DICOM Standard + // usize is at least u8 + header_byte = usize::from(src[pos]); + pos += 1; + if header_byte > 128 { + // Extend by copying the next byte (-N + 1) times + // however since using uint8 instead of int8 this will be + // (256 - N + 1) times + op_len = 257 - header_byte; + + // Check we have enough encoded data + if pos > eod { return err_eod } + + // Check segment for excess padding + if (idx + op_len) > segment_length { + dst.extend([src[pos]; segment_length - idx]); + + return Ok(dst) + } + + dst.extend([src[pos]; op_len]); + pos += 1; + } else if header_byte < 128 { + // Extend by literally copying the next (N + 1) bytes + op_len = header_byte + 1; + + // Check we have enough encoded data + if (pos + header_byte) > eod { return err_eod } + + // Check segment for excess padding + if (idx + op_len) > segment_length { + dst.extend(src[pos..pos + segment_length - idx]); + + return Ok(dst) + } + + dst.extend(src[pos..pos + op_len]); + pos += header_byte + 1; + } // header_byte == 128 is noop + + if pos >= eod { return Ok(dst) } + } +} + + fn _decode_segment_into_frame( src: &[u8], dst: &mut Vec, bpp: usize, initial_offset: usize ) -> Result> { From a9bb303cfcb1369c38d23c18cc70682f78ff4c3d Mon Sep 17 00:00:00 2001 From: scaramallion Date: Fri, 9 May 2025 15:55:37 +1000 Subject: [PATCH 2/9] WIP2 --- rle/tests/test_decode.py | 116 +++++++++++++++++++++++++- rle/tests/test_utils.py | 2 +- src/lib.rs | 170 +++++++++++++++++++++------------------ 3 files changed, 203 insertions(+), 85 deletions(-) diff --git a/rle/tests/test_decode.py b/rle/tests/test_decode.py index 36df11e..13247dd 100644 --- a/rle/tests/test_decode.py +++ b/rle/tests/test_decode.py @@ -69,19 +69,19 @@ def as_bytes(self, offsets): def test_bits_allocated_zero_raises(self): """Test exception raised for BitsAllocated 0.""" - msg = r"The \(0028,0100\) 'Bits Allocated' value must be 8, 16, 32 or 64" + msg = r"The \(0028,0100\) 'Bits Allocated' value must be 1, 8, 16, 32 or 64" with pytest.raises(ValueError, match=msg): decode_frame(b"\x00\x00\x00\x00", 1, 0, "<") def test_bits_allocated_not_octal_raises(self): - """Test exception raised for BitsAllocated not a multiple of 8.""" - msg = r"The \(0028,0100\) 'Bits Allocated' value must be 8, 16, 32 or 64" + """Test exception raised for BitsAllocated not 1 or a multiple of 8.""" + msg = r"The \(0028,0100\) 'Bits Allocated' value must be 1, 8, 16, 32 or 64" with pytest.raises(ValueError, match=msg): decode_frame(b"\x00\x00\x00\x00", 1, 12, "<") def test_bits_allocated_large_raises(self): """Test exception raised for BitsAllocated greater than 64.""" - msg = r"The \(0028,0100\) 'Bits Allocated' value must be 8, 16, 32 or 64" + msg = r"The \(0028,0100\) 'Bits Allocated' value must be 1, 8, 16, 32 or 64" with pytest.raises(ValueError, match=msg): decode_frame(b"\x00\x00\x00\x00", 1, 72, "<") @@ -357,6 +357,114 @@ def test_u32_3s(self): assert [4294967295, 16777216, 65536, 256, 1, 0] == arr[6:12].tolist() assert [1, 16777216, 65536, 256, 1, 4294967294] == arr[12:].tolist() + def test_u8_1s_bs1(self): + """Test decoding bit packed 1 sample/px.""" + header = b"\x01\x00\x00\x00\x40\x00\x00\x00" + header += (64 - len(header)) * b"\x00" + # 0, 64, 128, 160, 192, 255 + data = b"\x05\x00\x40\x80\xA0\xC0\xFF" + # Big endian + # 48 px - byte aligned in 6 bytes + decoded = decode_frame(header + data, 6 * 8, 1, ">") + arr = np.frombuffer(decoded, np.dtype("uint8")) + assert [0, 64, 128, 160, 192, 255] == arr.tolist() + + # 37 px -> 5 bytes + decoded = decode_frame(header + data, 4 * 8 + 5, 1, ">") + arr = np.frombuffer(decoded, np.dtype("uint8")) + assert [0, 64, 128, 160, 192] == arr.tolist() + + # 2 px -> 1 byte + decoded = decode_frame(header + data, 2, 1, ">") + arr = np.frombuffer(decoded, np.dtype("uint8")) + assert [0] == arr.tolist() + + # Little-endian + decoded = decode_frame(header + data, 6 * 8, 1, "<") + arr = np.frombuffer(decoded, np.dtype("uint8")) + assert [0, 64, 128, 160, 192, 255] == arr.tolist() + + decoded = decode_frame(header + data, 4 * 8 + 5, 1, "<") + arr = np.frombuffer(decoded, np.dtype("uint8")) + assert [0, 64, 128, 160, 192] == arr.tolist() + + decoded = decode_frame(header + data, 2, 1, "<") + arr = np.frombuffer(decoded, np.dtype("uint8")) + assert [0] == arr.tolist() + + def test_u8_3s_bs1(self): + """Test decoding bit packed 3 sample/px.""" + header = ( + b"\x03\x00\x00\x00" # 3 segments + b"\x40\x00\x00\x00" # 64 + b"\x47\x00\x00\x00" # 71 + b"\x4E\x00\x00\x00" # 78 + ) + header += (64 - len(header)) * b"\x00" + # 0, 64, 128, 160, 192, 255 + data = ( + b"\x05\x00\x40\x80\xA0\xC0\xFF" # R + b"\x05\x7F\xC0\x80\x40\x00\xFF" # B + b"\x05\x01\x40\x80\xA0\xC0\xFE" # G + ) + # 48 px - byte aligned in 18 bytes + decoded = decode_frame(header + data, 6 * 8, 1, "<") + arr = np.frombuffer(decoded, np.dtype("uint8")) + # Ordered all R, all G, all B + assert [0, 64, 128, 160, 192, 255] == arr[:6].tolist() + assert [127, 192, 128, 64, 0, 255] == arr[6:12].tolist() + assert [1, 64, 128, 160, 192, 254] == arr[12:].tolist() + + # 47 px - non-byte aligned in 18 bytes + decoded = decode_frame(header + data, 6 * 8 - 1, 1, "<") + arr = np.frombuffer(decoded, np.dtype("uint8")) + # Boundaries are 47 | 94 | 141 + # v removed + # 255 | 127: 0b[1111_111x 0b0]111_1111 -> 0b1111_1110 + assert [0, 64, 128, 160, 192, 254] == arr[:6].tolist() + # Left shift values by 1 bit + # 127 | 192: 0b0[111_1111 0b1]100_0000 -> 0b1111_1111 + # 192 | 128: 0b1[100_0000 0b1]000_0000 -> 0b1000_0001 + # 128 | 64: 0b1[000_0000 0b0]100_0000 -> 0b0000_0000 + # 64 | 0: 0b0[100_0000 0b0]000_0000 -> 0b1000_0000 + # 0 | 255: 0b0[000_0000 0b1]111_1111 -> 0b0000_0001 + # v removed + # 255 | 1: 0b1[111_111x 0b00]00_0001 -> 0b1111_1100 + assert [255, 129, 0, 128, 1, 252] == arr[6:12].tolist() + # Left shift values by 2 bits + # 1 | 64: 0b00[00_0001 0b01]00_0000 -> 0b0000_0101 + # 64 | 128: 0b01[00_0000 0b10]00_0000 -> 0b0000_0010 + # 128 | 160: 0b10[00_0000 0b10]10_0000 -> 0b0000_0010 + # 160 | 192: 0b10[10_0000 0b11]00_0000 -> 0b1000_0011 + # 192 | 254: 0b11[00_0000 0b11]11_1110 -> 0b0000_0011 + # v removed + # 254 | x: 0b11[11_111x -> 0b1111_1000 + assert [5, 2, 2, 131, 3, 248] == arr[12:].tolist() + + # 41 px - non-byte aligned in 16 bytes + decoded = decode_frame(header + data, 5 * 8 + 1, 1, "<") + arr = np.frombuffer(decoded, np.dtype("uint8")) + # Boundaries are 48 | 96 | 144 -> 41 | 82 | 123 + # vvv vvvv removed + # 255 | 255: 0b[1xxx_xxxx 0b0111_111]1 -> 0b1011_1111 + assert [0, 64, 128, 160, 192, 191] == arr[:6].tolist() + # Left shift values by 7 bits + # 255 | 192: 0b1111_111[1 0b1100_000]0 -> 0b1110_0000 + # 192 | 128: 0b1100_000[0 0b1000_000]0 -> 0b0100_0000 + # 128 | 64: 0b1000_000[0 0b0100_000]0 -> 0b0010_0000 + # 64 | 0: 0b0100_000[0 0b0000_000]0 -> 0b0000_0000 + # vvv vvvv removed + # 0 | 255 | 1: 0b0000_000[0 0b1xxx_xxxx 0b0000_00]01 -> 0b0100_0000 + assert [224, 64, 32, 0, 64] == arr[6:11].tolist() + # Left shift values by 14 bits + # 1 | 64: 0b0000_00[01 0b0100_00]00 -> 0b0101_0000 + # 64 | 128: 0b0100_00[00 0b1000_00]00 -> 0b0010_0000 + # 128 | 160: 0b1000_00[00 0b1010_00]00 -> 0b0010_1000 + # 160 | 192: 0b1010_00[00 0b1100_00]00 -> 0b0011_0000 + # vvv vvvv removed + # 192 | 254: 0b1100_[00 0b1]xxx_xxxx -> 0b0010_0000 + assert [80, 32, 40, 48, 32] == arr[11:].tolist() + @pytest.mark.skipif(not HAVE_PYDICOM, reason="No pydicom") class TestDecodeFrame_Datasets: diff --git a/rle/tests/test_utils.py b/rle/tests/test_utils.py index 5371d88..87c66ee 100644 --- a/rle/tests/test_utils.py +++ b/rle/tests/test_utils.py @@ -199,7 +199,7 @@ def test_bad_bits_allocated_raises(self): "byteorder": "<", } - msg = r"'bits_allocated' must be 8, 16, 32 or 64" + msg = r"'bits_allocated' must be 1, 8, 16, 32 or 64" with pytest.raises(ValueError, match=msg): encode_pixel_data(b"", **kwargs) diff --git a/src/lib.rs b/src/lib.rs index b5f5c74..d97e4a5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,6 +2,8 @@ use std::convert::TryFrom; use std::error::Error; +use bitvec::prelude::*; + use pyo3::prelude::*; use pyo3::wrap_pyfunction; use pyo3::types::{PyBytes, PyByteArray}; @@ -154,31 +156,32 @@ fn _decode_frame( String::from("'byteorder' must be '>' or '<'").into() ); - let mut is_bit_packed: bool = false; - - // Ensure we have a valid bits/px value: 1 or a multiple of 8 + // Check 'Bits Allocated' is 1 or a multiple of 8 + let mut is_bit_packed = false; + let mut bytes_per_pixel: u8; match bpp { 0 => return err_invalid_bits_allocated, - 1 => { is_bit_packed = true; }, + 1 => { + is_bit_packed = true; + bytes_per_pixel = 1; + }, _ => match bpp % 8 { - 0 => { }, - _ => return err_invalid_bits_allocated - } - } - - // For non-bit-packed data ensure `bytes_per_pixel` is in [1, 2, 4, 8] - if !is_bit_packed { - let bytes_per_pixel: u8 = bpp / 8; - match bytes_per_pixel { - 1 => {}, - 2 | 4 | 8 => match byteorder { - '>' | '<' => {}, - _ => return err_invalid_byteorder + 0 => { + bytes_per_pixel = bpp / 8; }, _ => return err_invalid_bits_allocated } - } else { + } + // Check `byteorder` is a valid character + // Check *Bits Allocated* is in [1, 8, 16, 32, 64] + match bytes_per_pixel { + 1 => {}, + 2 | 4 | 8 => match byteorder { + '>' | '<' => {}, + _ => return err_invalid_byteorder + }, + _ => return err_invalid_bits_allocated } // Parse the RLE header and check results @@ -187,6 +190,8 @@ fn _decode_frame( let encoded_length = src.len(); if encoded_length < 64 { return err_insufficient_data } + // Don't need to check the unwrap as we just checked + // there's enough data in `src` let header = <&[u8; 64]>::try_from(&src[0..64]).unwrap(); let all_offsets: [u32; 15] = _parse_header(header); @@ -252,19 +257,18 @@ fn _decode_frame( Might be a good idea to use bool arrays instead of u8 and implicitly unpack to avoid annoyances with multi-segment padding, then convert to u8 at the end. - - bitvec perhaps? */ // Decode each segment and place it into the vector // ------------------------------------------------ + // TODO: handle unwrap let pps = usize::try_from(nr_pixels).unwrap(); // Concatenate sample planes into a frame if !is_bit_packed { // Watch for overflow here; u32 * u32 -> u64 // Actual values are (u16 * u16) * u8 let expected_length = usize::try_from( - nr_pixels * u32::from(bytes_per_pixel * spp) + nr_pixels * nr_segments ).unwrap(); // Pre-allocate a vector for the decoded frame @@ -303,81 +307,69 @@ fn _decode_frame( } } - return Ok(frame); + return Ok(frame) + } // Bit-packed data - // u8 * u32 - // FIXME - let bits_per_segment = usize::try_from(8 * nr_pixels).unwrap(); // wrong! - // Each segment is padded to an even length, so segment boundaries should be - // whole integers - - // The number of whole bytes per segment, including any padding - let mut padding_bytes = 0; - match bits_per_segment % 16 { + // The number of whole bytes per segment after decoding + // TODO: handle unwraps + let mut bytes_per_segment = usize::try_from(nr_pixels / 8).unwrap(); + match nr_pixels % 8 { 0 => {}, - 1..=8 => { padding_bytes = 2 }, - 9..=15 => { padding_bytes = 1 }, + _ => { bytes_per_segment += 1); }, } - let bytes_per_segment = bits_per_segment / 8 + padding_bytes; let mut frame: Vec = Vec::new(); - for sample in 0..spp { // 0..0 or 0..2 + for idx in 0..(offsets.len() - 1) { // 0..1 or 0..3 // Start and end indices of the segment let start = usize::try_from(offsets[idx]).unwrap(); let end = usize::try_from(offsets[idx + 1]).unwrap(); - - // Decode the segment into the frame - let segment = _decode_segment_into_frame( + // TODO: match here on the unwrap + let segment = _decode_bit_packed_segment( <&[u8]>::try_from(&src[start..end]).unwrap(), - bytes_per_segment, + usize::from(bytes_per_segment), )?; - if segment.len() != bytes_per_segment { return err_segment_length } - if spp == 0 { return Ok(segment) } + if segment.len() != bytes_per_segment { return err_segment_length } - if !is_padded { - frame.extend(segment); - } else { - // hmm - } + if spp == 1 { return Ok(segment) } + frame.extend(segment); } - // Concatenate the segments together, bit shifting to ensure any padding is removed - - Ok(frame) -} - - -/* -fn _shift_bit_packed_segment( - prefix: u8, src: Vec, shift: u8 -) -> Vec { - assert!(0 < shift < 8); - - let mut dst = Vec::new(); - // shift the prefix to the right by the required shift - dst.push(prefix >> (8 - shift)) - - for idx in 0..src.len() { - dst[idx] = src[idx] << shift; - dst[idx] |= src[idx + 1] >> (8 - shift); + // Multiple samples but byte aligned + if nr_pixels % 8 == 0 { return Ok(frame) } + + // Multiple samples but not byte aligned + // -> need to remove the bit padding between segments + // TODO: handle unwraps + let mut bv = BitVec::<_, Msb0>::from_vec(frame); + let step = usize::try_from(nr_pixels).unwrap(); + for start in (step..bv.len() - 1).step_by(step) { + let end = start + (8 - usize::try_from(nr_pixels % 8).unwrap()); + let bv2 = bv.drain(start..end); } - dst + // Set any trailing bits to 0b0 + bv.set_uninitialized(false); + + Ok(bv.into_vec()) } -*/ fn _decode_bit_packed_segment( src: &[u8], segment_length: usize ) -> Result, Box> { - // + /* + + The returned data is guaranteed to be no more than segment_length in size. + */ let eod = src.len() - 1; let mut pos = 0; let mut header_byte: usize; let mut dst: Vec = Vec::new(); + let mut op_len: usize; + let mut idx: usize = 0; // number of bytes decoded let err_eod = Err( String::from( @@ -402,12 +394,13 @@ fn _decode_bit_packed_segment( // Check segment for excess padding if (idx + op_len) > segment_length { - dst.extend([src[pos]; segment_length - idx]); + dst.extend(vec![src[pos]; segment_length - idx]); return Ok(dst) } - dst.extend([src[pos]; op_len]); + dst.extend(vec![src[pos]; op_len]); + idx += op_len; pos += 1; } else if header_byte < 128 { // Extend by literally copying the next (N + 1) bytes @@ -418,13 +411,14 @@ fn _decode_bit_packed_segment( // Check segment for excess padding if (idx + op_len) > segment_length { - dst.extend(src[pos..pos + segment_length - idx]); + dst.extend(&src[pos..pos + segment_length - idx]); return Ok(dst) } - dst.extend(src[pos..pos + op_len]); + dst.extend(&src[pos..pos + op_len]); pos += header_byte + 1; + idx += op_len; } // header_byte == 128 is noop if pos >= eod { return Ok(dst) } @@ -673,12 +667,14 @@ fn _encode_frame( Required if bpp is greater than 1, '>' if `src` is in big endian byte order, '<' if little endian. */ + + // Pre-define our errors for neatness let err_invalid_nr_samples = Err( String::from("The (0028,0002) 'Samples per Pixel' must be 1 or 3").into() ); let err_invalid_bits_allocated = Err( String::from( - "The (0028,0100) 'Bits Allocated' value must be 8, 16, 32 or 64" + "The (0028,0100) 'Bits Allocated' value must be 1, 8, 16, 32 or 64" ).into() ); let err_invalid_nr_segments = Err( @@ -699,22 +695,31 @@ fn _encode_frame( String::from("'byteorder' must be '>' or '<'").into() ); - // Check 'Samples per Pixel' + // Check 'Samples per Pixel' is either 1 or 3 match spp { 1 | 3 => {}, _ => return err_invalid_nr_samples } - // Check 'Bits Allocated' + // Check 'Bits Allocated' is 1 or a multiple of 8 + let mut is_bit_packed = false; + let mut bytes_per_pixel: u8; match bpp { 0 => return err_invalid_bits_allocated, + 1 => { + is_bit_packed = true; + bytes_per_pixel = 1; + }, _ => match bpp % 8 { - 0 => {}, + 0 => { + bytes_per_pixel = bpp / 8; + }, _ => return err_invalid_bits_allocated } } - // Ensure `bytes_per_pixel` is in [1, 2, 4, 8] - let bytes_per_pixel: u8 = bpp / 8; + + // Check `byteorder` is a valid character + // Check *Bits Allocated* is in [1, 8, 16, 32, 64] match bytes_per_pixel { 1 => {}, 2 | 4 | 8 => match byteorder { @@ -725,11 +730,15 @@ fn _encode_frame( } // Ensure parameters are consistent + // TODO: handle unwrap let r = usize::try_from(rows).unwrap(); let c = usize::try_from(cols).unwrap(); - if src.len() != r * c * usize::from(spp * bytes_per_pixel) { - return err_invalid_parameters + // FIXME: add support for bit-packed data + if !is_bit_packed { + if src.len() != r * c * usize::from(spp * bytes_per_pixel) { + return err_invalid_parameters + } } let nr_segments: u8 = spp * bytes_per_pixel; @@ -766,6 +775,7 @@ fn _encode_frame( .cloned() .collect(); + // FIXME: `cols` probably not correct for bit-packed data _encode_segment_from_vector(segment, dst, cols)?; } From 5f3170898d5ee86c7d493e96e60d5040075c1964 Mon Sep 17 00:00:00 2001 From: scaramallion Date: Thu, 5 Jun 2025 15:10:36 +1000 Subject: [PATCH 3/9] Add release note --- docs/release_notes/v2.2.0.rst | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 docs/release_notes/v2.2.0.rst diff --git a/docs/release_notes/v2.2.0.rst b/docs/release_notes/v2.2.0.rst new file mode 100644 index 0000000..1a81007 --- /dev/null +++ b/docs/release_notes/v2.2.0.rst @@ -0,0 +1,9 @@ +.. _v2.2.0: + +2.2.0 +===== + +Enhancements +............ + +* Added support for encoding and decoding RLE data with a *Bits Allocated* of 1 From 62b5e722fb756290e6b7a1406cfc0a7104efa81d Mon Sep 17 00:00:00 2001 From: scaramallion Date: Thu, 4 Sep 2025 11:07:38 +1000 Subject: [PATCH 4/9] Probably before rethink --- Cargo.toml | 4 +- LICENSE | 2 +- pyproject.toml | 3 +- rle/utils.py | 10 ++- src/lib.rs | 190 ++++++++++++++++++++++++------------------------- 5 files changed, 101 insertions(+), 108 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index bec581f..a30f956 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "pylibjpeg-rle" -version = "1.1.0" +version = "1.2.0" authors = ["scaramallion "] edition = "2024" exclude = [".github", "docs", ".codecov.yml", "asv.*", ".gitignore", ".coveragerc"] @@ -11,5 +11,5 @@ crate-type = ["cdylib"] [dependencies] -pyo3 = { version = "0.24.2", features = ["extension-module"] } +pyo3 = { version = "0.25.0", features = ["extension-module"] } bitvec = { version = "1.0.1" } diff --git a/LICENSE b/LICENSE index 62f3da2..133562c 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2020-2021 scaramallion +Copyright (c) 2020-2025 scaramallion Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/pyproject.toml b/pyproject.toml index e79a7b6..d4eca56 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,6 +18,7 @@ classifiers = [ "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", "Operating System :: MacOS :: MacOS X", "Operating System :: POSIX :: Linux", "Operating System :: Microsoft :: Windows", @@ -59,7 +60,7 @@ omit = [ ] [tool.mypy] -python_version = "3.8" +python_version = "3.10" files = "rle" exclude = ["rle/tests", "rle/benchmarks"] show_error_codes = true diff --git a/rle/utils.py b/rle/utils.py index c9e491f..376c084 100644 --- a/rle/utils.py +++ b/rle/utils.py @@ -26,8 +26,6 @@ def decode_pixel_data( ) -> Union[np.ndarray, bytearray]: """Return the decoded RLE Lossless data as a :class:`numpy.ndarray`. - Intended for use with *pydicom* ``Dataset`` objects. - Parameters ---------- src : bytes @@ -192,11 +190,11 @@ def encode_pixel_data( **kwargs If `ds` is not used then the following are required: - * ``'rows': int`` the number of rows contained in `src` - * ``'columns': int`` the number of columns contained in `src` - * ``samples_per_pixel': int`` the number of samples per pixel, either + * ``'rows'``: :class:`int` the number of rows contained in `src` + * ``'columns'``: :class:`int` the number of columns contained in `src` + * ``'samples_per_pixel'``: :class:`int` the number of samples per pixel, either 1 for monochrome or 3 for RGB or similar data. - * ``'bits_allocated': int`` the number of bits needed to contain each + * ``'bits_allocated'``: :class:`int` the number of bits needed to contain each pixel, either 1, 8, 16, 32 or 64. Returns diff --git a/src/lib.rs b/src/lib.rs index d97e4a5..aa04348 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,7 +2,7 @@ use std::convert::TryFrom; use std::error::Error; -use bitvec::prelude::*; +// use bitvec::prelude::*; use pyo3::prelude::*; use pyo3::wrap_pyfunction; @@ -60,7 +60,7 @@ fn _parse_header(src: &[u8; 64]) -> [u32; 15] { Parameters ---------- src - The 64 byte RLE header. + The 64 byte RLE header containing 15 little-endian ordered offset values. */ return [ u32::from_le_bytes([ src[4], src[5], src[6], src[7]]), @@ -158,7 +158,7 @@ fn _decode_frame( // Check 'Bits Allocated' is 1 or a multiple of 8 let mut is_bit_packed = false; - let mut bytes_per_pixel: u8; + let mut bytes_per_pixel: u8; // Valid values are 1 | 2 | 4 | 8 match bpp { 0 => return err_invalid_bits_allocated, 1 => { @@ -193,6 +193,7 @@ fn _decode_frame( // Don't need to check the unwrap as we just checked // there's enough data in `src` let header = <&[u8; 64]>::try_from(&src[0..64]).unwrap(); + // All 15 header offsets, however no guarantee they will be non-zero let all_offsets: [u32; 15] = _parse_header(header); // First offset must always be 64 @@ -221,13 +222,19 @@ fn _decode_frame( // Check the samples per pixel is conformant let spp: u8 = nr_segments / bytes_per_pixel; match spp { - 1 | 3 => {}, + 1 => {}, + 3 => { + // Bit packed data must be 1 sample per pixel + match bpp { + 1 => { return err_invalid_nr_samples }, + _ => {} + } + }, _ => return err_invalid_nr_samples } - /* - Example - ------- + /* Example + ---------- RLE encoded data is ordered like this (for 16-bit, 3 sample): Segment: 1 | 2 | 3 | 4 | 5 | 6 R MSB | R LSB | G MSB | G LSB | B MSB | B LSB @@ -242,23 +249,6 @@ fn _decode_frame( MSB LSB MSB LSB ... MSB LSB | MSB LSB MSB LSB ... MSB LSB | ... */ - /* - Bit-packed data - --------------- - - For bit-packed data (i.e. a *Bits Allocated* of 1), we decode the RLE encoding but - leave the bit-packing in place. This means the length of each decoded segment is not - equal to the number of pixels but instead the number of pixels / 8, rounded up to the - nearest whole integer with the difference being accounted for by padding 0b0 bits. - - Each decoded segment must have these padding bits removed and the final frame should be - the concatenated unpadded segments. Fortunately for bit-packed data there can be at - most 3 segments (with a *Samples per Pixel* of 3). - - Might be a good idea to use bool arrays instead of u8 and implicitly unpack to avoid - annoyances with multi-segment padding, then convert to u8 at the end. - */ - // Decode each segment and place it into the vector // ------------------------------------------------ // TODO: handle unwrap @@ -303,6 +293,7 @@ fn _decode_frame( usize::from(bytes_per_pixel), usize::from(byte_offset) + so )?; + if len != pps { return err_segment_length } } } @@ -310,50 +301,32 @@ fn _decode_frame( return Ok(frame) } - // Bit-packed data - // The number of whole bytes per segment after decoding - // TODO: handle unwraps - let mut bytes_per_segment = usize::try_from(nr_pixels / 8).unwrap(); - match nr_pixels % 8 { - 0 => {}, - _ => { bytes_per_segment += 1); }, - } - - let mut frame: Vec = Vec::new(); - for idx in 0..(offsets.len() - 1) { // 0..1 or 0..3 - // Start and end indices of the segment - let start = usize::try_from(offsets[idx]).unwrap(); - let end = usize::try_from(offsets[idx + 1]).unwrap(); - // TODO: match here on the unwrap - let segment = _decode_bit_packed_segment( - <&[u8]>::try_from(&src[start..end]).unwrap(), - usize::from(bytes_per_segment), - )?; - - if segment.len() != bytes_per_segment { return err_segment_length } - - if spp == 1 { return Ok(segment) } + /* Bit-packed data + ------------------ + For bit-packed data (i.e. a *Bits Allocated* of 1), we decode the RLE encoded data but + leave the bit-packing in place. This means the length of each decoded segment is not + equal to the number of pixels but instead the number of pixels / 8, rounded up to the + nearest whole integer, with the difference being accounted for by 0b0 padding bits. + */ - frame.extend(segment); + // The expected number of whole bytes per segment after decoding + let mut bytes_per_segment: usize; + match nr_pixels % 8 { + 0 => { bytes_per_segment = pps / 8; }, + _ => { bytes_per_segment = (pps + (8 - pps % 8)) / 8; }, } - // Multiple samples but byte aligned - if nr_pixels % 8 == 0 { return Ok(frame) } - - // Multiple samples but not byte aligned - // -> need to remove the bit padding between segments - // TODO: handle unwraps - let mut bv = BitVec::<_, Msb0>::from_vec(frame); - let step = usize::try_from(nr_pixels).unwrap(); - for start in (step..bv.len() - 1).step_by(step) { - let end = start + (8 - usize::try_from(nr_pixels % 8).unwrap()); - let bv2 = bv.drain(start..end); - } + // For bit-packed data we can only have 1 segment. + let start = usize::try_from(offsets[0]).unwrap(); + let end = usize::try_from(offsets[1]).unwrap(); + let segment = _decode_bit_packed_segment( + <&[u8]>::try_from(&src[start..end]).unwrap(), + bytes_per_segment, + )?; - // Set any trailing bits to 0b0 - bv.set_uninitialized(false); + if segment.len() != bytes_per_segment) { return err_segment_length } - Ok(bv.into_vec()) + return Ok(segment) } @@ -373,8 +346,7 @@ fn _decode_bit_packed_segment( let err_eod = Err( String::from( - "The end of the data was reached before the segment was \ - completely decoded" + "The end of the data was reached before the segment was completely decoded" ).into() ); @@ -625,7 +597,7 @@ fn encode_frame<'py>( spp : int The number of samples per pixel, supported values are 1 or 3. bpp : int - The number of bits per pixel, supported values are 8, 16, 32 and 64. + The number of bits per pixel, supported values are 1, 8, 16, 32 and 64. byteorder : str Required if `bpp` is greater than 1, '>' if `src` is in big endian byte order, '<' if little endian. @@ -651,8 +623,8 @@ fn _encode_frame( Parameters ---------- src - The data to be RLE encoded, ordered as R1, G1, B1, R2, G2, B2, ..., - Rn, Gn, Bn (i.e. Planar Configuration 0). + The data to be RLE encoded, with multi-sample data ordered as R1, G1, B1, + R2, G2, B2, ..., Rn, Gn, Bn (i.e. Planar Configuration 0). dst The vector storing the encoded data. rows @@ -660,9 +632,10 @@ fn _encode_frame( cols The number of columns in the data. spp - The number of samples per pixel, supported values are 1 or 3. + The number of samples per pixel, supported values are 1 or 3. May only be 1 if `bpp` + is 1. bpp - The number of bits per pixel, supported values are 8, 16, 32 and 64. + The number of bits per pixel, supported values are 1, 8, 16, 32 and 64. byteorder Required if bpp is greater than 1, '>' if `src` is in big endian byte order, '<' if little endian. @@ -696,26 +669,31 @@ fn _encode_frame( ); // Check 'Samples per Pixel' is either 1 or 3 - match spp { - 1 | 3 => {}, - _ => return err_invalid_nr_samples - } - // Check 'Bits Allocated' is 1 or a multiple of 8 + // Check 'Samples per Pixel' is 1 if 'Bits Allocated' is 1 let mut is_bit_packed = false; let mut bytes_per_pixel: u8; - match bpp { - 0 => return err_invalid_bits_allocated, + match spp { 1 => { - is_bit_packed = true; - bytes_per_pixel = 1; - }, - _ => match bpp % 8 { - 0 => { - bytes_per_pixel = bpp / 8; - }, - _ => return err_invalid_bits_allocated + match bpp { + 1 => { + is_bit_packed = true; + bytes_per_pixel = 1; + }, + 8 | 16 | 32 | 64 => { + bytes_per_pixel = bpp / 8; + }, + _ => { return err_invalid_bits_allocated } + } } + 3 => { + match bpp { + 1 => { return err_invalid_nr_samples }, + 8 | 16 | 32 | 64 => { bytes_per_pixel = bpp / 8; }, + _ => { return err_invalid_bits_allocated } + } + }, + _ => return err_invalid_nr_samples } // Check `byteorder` is a valid character @@ -734,11 +712,17 @@ fn _encode_frame( let r = usize::try_from(rows).unwrap(); let c = usize::try_from(cols).unwrap(); - // FIXME: add support for bit-packed data + let total_pixels = r * c * usize::from(spp); if !is_bit_packed { - if src.len() != r * c * usize::from(spp * bytes_per_pixel) { - return err_invalid_parameters + let total_length = total_pixels * usize::from(bytes_per_pixel); + if src.len() != total_length { return err_invalid_parameters } + } else { + let mut total_length: usize; + match total_pixels % 8 { + 0 => { total_length = total_pixels / 8; }, + _ => { total_length = (total_pixels + (8 - total_pixels % 8)) / 8; } } + if src.len() != total_length { return err_invalid_parameters } } let nr_segments: u8 = spp * bytes_per_pixel; @@ -749,10 +733,11 @@ fn _encode_frame( dst.extend(u32::from(nr_segments).to_le_bytes().to_vec()); dst.extend([0u8; 60].to_vec()); - // A vector of the start indexes used when segmenting - default big endian + // A vector of the start indexes used when segmenting + // Start with big-endian ordered pixel sample values let mut start_indices: Vec = (0..usize::from(nr_segments)).collect(); if byteorder != '>' { - // `src` has little endian byte ordering + // Typically `src` uses little endian byte ordering for idx in 0..spp { let s = usize::from(idx * bytes_per_pixel); let e = usize::from((idx + 1) * bytes_per_pixel); @@ -761,6 +746,8 @@ fn _encode_frame( } // Encode the data and update the RLE header segment offsets + // Segments are ordered from most significant byte to least significant for + // multi-byte values for idx in 0..usize::from(nr_segments) { // Update RLE header: convert current offset to 4x le ordered u8s let current_offset = (u32::try_from(dst.len()).unwrap()).to_le_bytes(); @@ -769,14 +756,21 @@ fn _encode_frame( } // Encode! Note the offset start of the `src` iter - let segment: Vec = src[start_indices[idx]..] - .into_iter() - .step_by(usize::from(spp * bytes_per_pixel)) - .cloned() - .collect(); - - // FIXME: `cols` probably not correct for bit-packed data - _encode_segment_from_vector(segment, dst, cols)?; + if !is_bit_packed { + let segment: Vec = src[start_indices[idx]..] + .into_iter() + .step_by(usize::from(spp * bytes_per_pixel)) + .cloned() + .collect(); + + _encode_segment_from_vector(segment, dst, cols)?; + } else { + // Should only ever be 1 sample per pixel -> 1 segment + // cols is wrong here, should be / 8 + // Also the DICOM Standard says each row should be encoded separately, + // what do we do if the number of columns isn't divisble by 8? + _encode_segment_from_vector(src, dst, cols)?; + } } Ok(()) From 0f611904d30cac189ffb4f886f5059a23dc10826 Mon Sep 17 00:00:00 2001 From: scaramallion Date: Fri, 5 Sep 2025 16:06:09 +1000 Subject: [PATCH 5/9] Rework to fit standard, add pack_bits and unpack_bits --- .github/workflows/pytest-builds.yml | 5 +- .github/workflows/release-wheels.yml | 127 ++++++----- Cargo.lock | 90 ++------ Cargo.toml | 3 +- pyproject.toml | 6 +- rle/__init__.py | 9 +- rle/tests/test_decode.py | 132 +++--------- rle/tests/test_encode.py | 12 +- rle/tests/test_utils.py | 305 ++++++++++++++++++++++++++- rle/utils.py | 132 +++++++++--- src/lib.rs | 298 ++++++++++++++++---------- 11 files changed, 750 insertions(+), 369 deletions(-) diff --git a/.github/workflows/pytest-builds.yml b/.github/workflows/pytest-builds.yml index b9bfd80..4022531 100644 --- a/.github/workflows/pytest-builds.yml +++ b/.github/workflows/pytest-builds.yml @@ -12,11 +12,11 @@ jobs: strategy: fail-fast: false matrix: - python-version: ['3.10', '3.11', '3.12', '3.13'] + python-version: ['3.10', '3.11', '3.12', '3.13', '3.14'] os: [ubuntu-latest, windows-latest, macos-latest] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 with: fetch-depth: 2 @@ -24,6 +24,7 @@ jobs: uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} + allow-prereleases: true - name: Install Rust (stable) run: diff --git a/.github/workflows/release-wheels.yml b/.github/workflows/release-wheels.yml index c8c878d..a3e7cf9 100644 --- a/.github/workflows/release-wheels.yml +++ b/.github/workflows/release-wheels.yml @@ -13,7 +13,7 @@ jobs: runs-on: ubuntu-latest timeout-minutes: 10 steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 with: submodules: true @@ -58,6 +58,9 @@ jobs: - os: windows-latest python: 313 platform_id: win32 + - os: windows-latest + python: 314 + platform_id: win32 # Windows 64 bit - os: windows-latest @@ -72,6 +75,9 @@ jobs: - os: windows-latest python: 313 platform_id: win_amd64 + - os: windows-latest + python: 314 + platform_id: win_amd64 # Linux 64 bit manylinux2014 - os: ubuntu-latest @@ -90,6 +96,10 @@ jobs: python: 313 platform_id: manylinux_x86_64 manylinux_image: manylinux2014 + - os: ubuntu-latest + python: 314 + platform_id: manylinux_x86_64 + manylinux_image: manylinux2014 # Linux aarch64 - os: ubuntu-latest @@ -104,6 +114,9 @@ jobs: - os: ubuntu-latest python: 313 platform_id: manylinux_aarch64 + - os: ubuntu-latest + python: 314 + platform_id: manylinux_aarch64 # MacOS x86_64 - os: macos-latest @@ -118,6 +131,9 @@ jobs: - os: macos-latest python: 313 platform_id: macosx_x86_64 + - os: macos-latest + python: 314 + platform_id: macosx_x86_64 # MacOS arm64 - os: macos-latest @@ -132,9 +148,12 @@ jobs: - os: macos-latest python: 313 platform_id: macosx_arm64 + - os: macos-latest + python: 314 + platform_id: macosx_arm64 steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 with: submodules: true @@ -157,7 +176,7 @@ jobs: run: | python -m pip install -U pip python -m pip install -U setuptools-rust - python -m pip install cibuildwheel>=2.23 + python -m pip install cibuildwheel>=3.1.3 - name: Build wheels env: @@ -179,63 +198,63 @@ jobs: name: wheel-${{ matrix.python }}-${{ matrix.platform_id }} path: ./dist - test-package: - name: Test built package - needs: [ build-wheels, build-sdist ] - runs-on: ubuntu-latest - timeout-minutes: 30 - strategy: - fail-fast: false - matrix: - python-version: ['3.10', '3.11', '3.12', '3.13'] - - steps: - - name: Install Rust (stable) - run: - curl https://sh.rustup.rs -sSf | sh -s -- -y - - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - - name: Download the wheels - uses: actions/download-artifact@v4 - with: - path: dist/ - merge-multiple: true - - - name: Install from package wheels and test - run: | - python -m venv testwhl - source testwhl/bin/activate - python -m pip install -U pip - python -m pip install -U pytest pydicom pylibjpeg - python -m pip uninstall -y pylibjpeg-rle - python -m pip install git+https://github.com/pydicom/pylibjpeg-data - python -m pip install -U --pre --find-links dist/ pylibjpeg-rle - python -m pytest --pyargs rle.tests - deactivate - - - name: Install from package tarball and test - run: | - python -m venv testsrc - source testsrc/bin/activate - python -m pip install -U pip - python -m pip install -U pytest pydicom pylibjpeg - python -m pip uninstall -y pylibjpeg-rle - python -m pip install git+https://github.com/pydicom/pylibjpeg-data - export PATH="$PATH:$HOME/.cargo/bin" - python -m pip install -U dist/pylibjpeg*rle-*.tar.gz - python -m pytest --pyargs rle.tests - deactivate + # test-package: + # name: Test built package + # needs: [ build-wheels, build-sdist ] + # runs-on: ubuntu-latest + # timeout-minutes: 30 + # strategy: + # fail-fast: false + # matrix: + # python-version: ['3.10', '3.11', '3.12', '3.13', '3.14'] + # + # steps: + # - name: Install Rust (stable) + # run: + # curl https://sh.rustup.rs -sSf | sh -s -- -y + # + # - name: Set up Python ${{ matrix.python-version }} + # uses: actions/setup-python@v5 + # with: + # python-version: ${{ matrix.python-version }} + # + # - name: Download the wheels + # uses: actions/download-artifact@v4 + # with: + # path: dist/ + # merge-multiple: true + # + # - name: Install from package wheels and test + # run: | + # python -m venv testwhl + # source testwhl/bin/activate + # python -m pip install -U pip + # python -m pip install -U pytest pydicom pylibjpeg + # python -m pip uninstall -y pylibjpeg-rle + # python -m pip install git+https://github.com/pydicom/pylibjpeg-data + # python -m pip install -U --pre --find-links dist/ pylibjpeg-rle + # python -m pytest --pyargs rle.tests + # deactivate + # + # - name: Install from package tarball and test + # run: | + # python -m venv testsrc + # source testsrc/bin/activate + # python -m pip install -U pip + # python -m pip install -U pytest pydicom pylibjpeg + # python -m pip uninstall -y pylibjpeg-rle + # python -m pip install git+https://github.com/pydicom/pylibjpeg-data + # export PATH="$PATH:$HOME/.cargo/bin" + # python -m pip install -U dist/pylibjpeg*rle-*.tar.gz + # python -m pytest --pyargs rle.tests + # deactivate # The pypi upload fails with non-linux containers, so grab the uploaded # artifacts and run using those # See: https://github.com/pypa/gh-action-pypi-publish/discussions/15 deploy: name: Upload wheels to PyPI - needs: [ test-package ] + needs: [ build-wheels ] runs-on: ubuntu-latest environment: name: pypi diff --git a/Cargo.lock b/Cargo.lock index fcc00f6..354d199 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,33 +4,9 @@ version = 4 [[package]] name = "autocfg" -version = "1.4.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" - -[[package]] -name = "bitvec" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" -dependencies = [ - "funty", - "radium", - "tap", - "wyz", -] - -[[package]] -name = "cfg-if" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" - -[[package]] -name = "funty" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] name = "heck" @@ -46,9 +22,9 @@ checksum = "f4c7245a08504955605670dbf141fceab975f15ca21570696aebe9d2e71576bd" [[package]] name = "libc" -version = "0.2.172" +version = "0.2.175" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa" +checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543" [[package]] name = "memoffset" @@ -67,34 +43,32 @@ checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" [[package]] name = "portable-atomic" -version = "1.11.0" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "350e9b48cbc6b0e028b0473b114454c6316e57336ee184ceab6e53f72c178b3e" +checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" [[package]] name = "proc-macro2" -version = "1.0.95" +version = "1.0.101" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" +checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de" dependencies = [ "unicode-ident", ] [[package]] name = "pylibjpeg-rle" -version = "1.1.0" +version = "1.2.0" dependencies = [ - "bitvec", "pyo3", ] [[package]] name = "pyo3" -version = "0.24.2" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5203598f366b11a02b13aa20cab591229ff0a89fd121a308a5df751d5fc9219" +checksum = "7ba0117f4212101ee6544044dae45abe1083d30ce7b29c4b5cbdfa2354e07383" dependencies = [ - "cfg-if", "indoc", "libc", "memoffset", @@ -108,19 +82,18 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.24.2" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99636d423fa2ca130fa5acde3059308006d46f98caac629418e53f7ebb1e9999" +checksum = "4fc6ddaf24947d12a9aa31ac65431fb1b851b8f4365426e182901eabfb87df5f" dependencies = [ - "once_cell", "target-lexicon", ] [[package]] name = "pyo3-ffi" -version = "0.24.2" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78f9cf92ba9c409279bc3305b5409d90db2d2c22392d443a87df3a1adad59e33" +checksum = "025474d3928738efb38ac36d4744a74a400c901c7596199e20e45d98eb194105" dependencies = [ "libc", "pyo3-build-config", @@ -128,9 +101,9 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.24.2" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b999cb1a6ce21f9a6b147dcf1be9ffedf02e0043aec74dc390f3007047cecd9" +checksum = "2e64eb489f22fe1c95911b77c44cc41e7c19f3082fc81cce90f657cdc42ffded" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -140,9 +113,9 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.24.2" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "822ece1c7e1012745607d5cf0bcb2874769f0f7cb34c4cde03b9358eb9ef911a" +checksum = "100246c0ecf400b475341b8455a9213344569af29a3c841d29270e53102e0fcf" dependencies = [ "heck", "proc-macro2", @@ -160,29 +133,17 @@ dependencies = [ "proc-macro2", ] -[[package]] -name = "radium" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" - [[package]] name = "syn" -version = "2.0.101" +version = "2.0.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ce2b7fc941b3a24138a0a7cf8e858bfc6a992e7978a068a5c760deb0ed43caf" +checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] -[[package]] -name = "tap" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" - [[package]] name = "target-lexicon" version = "0.13.2" @@ -200,12 +161,3 @@ name = "unindent" version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3" - -[[package]] -name = "wyz" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" -dependencies = [ - "tap", -] diff --git a/Cargo.toml b/Cargo.toml index a30f956..144a8ed 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,5 +11,4 @@ crate-type = ["cdylib"] [dependencies] -pyo3 = { version = "0.25.0", features = ["extension-module"] } -bitvec = { version = "1.0.1" } +pyo3 = { version = "0.26.0", features = ["extension-module"] } diff --git a/pyproject.toml b/pyproject.toml index d4eca56..ab085eb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,9 +54,13 @@ homepage = "https://github.com/pydicom/pylibjpeg-rle" [project.entry-points."pylibjpeg.pixel_data_encoders"] "1.2.840.10008.1.2.5" = "rle:encode_pixel_data" +[project.entry-points."pylibjpeg.utils"] +"pack_bits" = "rle:pack_bits" +"unpack_bits" = "rle:unpack_bits" + [tool.coverage.run] omit = [ - "libjpeg/tests/*", + "rle/tests/*", ] [tool.mypy] diff --git a/rle/__init__.py b/rle/__init__.py index 8a79be0..2b21cf7 100644 --- a/rle/__init__.py +++ b/rle/__init__.py @@ -1,4 +1,11 @@ """Set package shortcuts.""" from rle._version import __version__ -from rle.utils import pixel_array, generate_frames, decode_pixel_data, encode_pixel_data +from rle.utils import ( + pixel_array, + generate_frames, + decode_pixel_data, + encode_pixel_data, + pack_bits, + unpack_bits, +) diff --git a/rle/tests/test_decode.py b/rle/tests/test_decode.py index 13247dd..2f7368b 100644 --- a/rle/tests/test_decode.py +++ b/rle/tests/test_decode.py @@ -126,6 +126,28 @@ def test_invalid_samples_px_raises(self): with pytest.raises(ValueError, match=msg): decode_frame(d + b"\x00" * 8, 1, 8, "<") + # Bits Allocated 1 must be Samples per Pixel 1 + header = ( + b"\x03\x00\x00\x00" # 3 segments + b"\x40\x00\x00\x00" # 64 + b"\x47\x00\x00\x00" # 71 + b"\x4E\x00\x00\x00" # 78 + ) + header += (64 - len(header)) * b"\x00" + # 2 x 3 data + # 0, 64, 128, 160, 192, 255 + data = ( + b"\x05\x00\x40\x80\xA0\xC0\xFF" # R + b"\x05\xFF\xC0\x80\x40\x00\xFF" # B + b"\x05\x01\x40\x80\xA0\xC0\xFE" # G + ) + msg = ( + r"The \(0028,0002\) 'Samples per Pixel' must be 1 if \(0028,0100\) 'Bits " + r"Allocated' is 1" + ) + with pytest.raises(ValueError, match=msg): + decoded = decode_frame(header + data, 2 * 3, 1, "<") + def test_insufficient_frame_literal(self): """Test segment with excess padding on lit.""" d = self.as_bytes([64]) @@ -361,109 +383,19 @@ def test_u8_1s_bs1(self): """Test decoding bit packed 1 sample/px.""" header = b"\x01\x00\x00\x00\x40\x00\x00\x00" header += (64 - len(header)) * b"\x00" - # 0, 64, 128, 160, 192, 255 - data = b"\x05\x00\x40\x80\xA0\xC0\xFF" - # Big endian - # 48 px - byte aligned in 6 bytes - decoded = decode_frame(header + data, 6 * 8, 1, ">") - arr = np.frombuffer(decoded, np.dtype("uint8")) - assert [0, 64, 128, 160, 192, 255] == arr.tolist() - - # 37 px -> 5 bytes - decoded = decode_frame(header + data, 4 * 8 + 5, 1, ">") - arr = np.frombuffer(decoded, np.dtype("uint8")) - assert [0, 64, 128, 160, 192] == arr.tolist() - - # 2 px -> 1 byte - decoded = decode_frame(header + data, 2, 1, ">") - arr = np.frombuffer(decoded, np.dtype("uint8")) - assert [0] == arr.tolist() - - # Little-endian - decoded = decode_frame(header + data, 6 * 8, 1, "<") - arr = np.frombuffer(decoded, np.dtype("uint8")) - assert [0, 64, 128, 160, 192, 255] == arr.tolist() - - decoded = decode_frame(header + data, 4 * 8 + 5, 1, "<") - arr = np.frombuffer(decoded, np.dtype("uint8")) - assert [0, 64, 128, 160, 192] == arr.tolist() - - decoded = decode_frame(header + data, 2, 1, "<") - arr = np.frombuffer(decoded, np.dtype("uint8")) - assert [0] == arr.tolist() - - def test_u8_3s_bs1(self): - """Test decoding bit packed 3 sample/px.""" - header = ( - b"\x03\x00\x00\x00" # 3 segments - b"\x40\x00\x00\x00" # 64 - b"\x47\x00\x00\x00" # 71 - b"\x4E\x00\x00\x00" # 78 - ) - header += (64 - len(header)) * b"\x00" - # 0, 64, 128, 160, 192, 255 - data = ( - b"\x05\x00\x40\x80\xA0\xC0\xFF" # R - b"\x05\x7F\xC0\x80\x40\x00\xFF" # B - b"\x05\x01\x40\x80\xA0\xC0\xFE" # G - ) - # 48 px - byte aligned in 18 bytes - decoded = decode_frame(header + data, 6 * 8, 1, "<") + # 0 0 0 0 0 1 0 1 0 1 1 0 1 1 1 1 + data = b"\xFC\x00\x07\x01\x00\x01\x00\x01\x01\x00\x01\xFD\x01\x00" + decoded = decode_frame(header + data, 16, 1, ">") arr = np.frombuffer(decoded, np.dtype("uint8")) - # Ordered all R, all G, all B - assert [0, 64, 128, 160, 192, 255] == arr[:6].tolist() - assert [127, 192, 128, 64, 0, 255] == arr[6:12].tolist() - assert [1, 64, 128, 160, 192, 254] == arr[12:].tolist() + assert [0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1] == arr.tolist() + decoded = decode_frame(header + data, 16, 1, "<") + assert [0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1] == arr.tolist() - # 47 px - non-byte aligned in 18 bytes - decoded = decode_frame(header + data, 6 * 8 - 1, 1, "<") - arr = np.frombuffer(decoded, np.dtype("uint8")) - # Boundaries are 47 | 94 | 141 - # v removed - # 255 | 127: 0b[1111_111x 0b0]111_1111 -> 0b1111_1110 - assert [0, 64, 128, 160, 192, 254] == arr[:6].tolist() - # Left shift values by 1 bit - # 127 | 192: 0b0[111_1111 0b1]100_0000 -> 0b1111_1111 - # 192 | 128: 0b1[100_0000 0b1]000_0000 -> 0b1000_0001 - # 128 | 64: 0b1[000_0000 0b0]100_0000 -> 0b0000_0000 - # 64 | 0: 0b0[100_0000 0b0]000_0000 -> 0b1000_0000 - # 0 | 255: 0b0[000_0000 0b1]111_1111 -> 0b0000_0001 - # v removed - # 255 | 1: 0b1[111_111x 0b00]00_0001 -> 0b1111_1100 - assert [255, 129, 0, 128, 1, 252] == arr[6:12].tolist() - # Left shift values by 2 bits - # 1 | 64: 0b00[00_0001 0b01]00_0000 -> 0b0000_0101 - # 64 | 128: 0b01[00_0000 0b10]00_0000 -> 0b0000_0010 - # 128 | 160: 0b10[00_0000 0b10]10_0000 -> 0b0000_0010 - # 160 | 192: 0b10[10_0000 0b11]00_0000 -> 0b1000_0011 - # 192 | 254: 0b11[00_0000 0b11]11_1110 -> 0b0000_0011 - # v removed - # 254 | x: 0b11[11_111x -> 0b1111_1000 - assert [5, 2, 2, 131, 3, 248] == arr[12:].tolist() - - # 41 px - non-byte aligned in 16 bytes - decoded = decode_frame(header + data, 5 * 8 + 1, 1, "<") + # 0 0 0 0 0 1 0 1 0 1 1 0 1 1 1 + data = b"\xFC\x00\x07\x01\x00\x01\x00\x01\x01\x00\x01\xFE\x01\x00" + decoded = decode_frame(header + data, 15, 1, ">") arr = np.frombuffer(decoded, np.dtype("uint8")) - # Boundaries are 48 | 96 | 144 -> 41 | 82 | 123 - # vvv vvvv removed - # 255 | 255: 0b[1xxx_xxxx 0b0111_111]1 -> 0b1011_1111 - assert [0, 64, 128, 160, 192, 191] == arr[:6].tolist() - # Left shift values by 7 bits - # 255 | 192: 0b1111_111[1 0b1100_000]0 -> 0b1110_0000 - # 192 | 128: 0b1100_000[0 0b1000_000]0 -> 0b0100_0000 - # 128 | 64: 0b1000_000[0 0b0100_000]0 -> 0b0010_0000 - # 64 | 0: 0b0100_000[0 0b0000_000]0 -> 0b0000_0000 - # vvv vvvv removed - # 0 | 255 | 1: 0b0000_000[0 0b1xxx_xxxx 0b0000_00]01 -> 0b0100_0000 - assert [224, 64, 32, 0, 64] == arr[6:11].tolist() - # Left shift values by 14 bits - # 1 | 64: 0b0000_00[01 0b0100_00]00 -> 0b0101_0000 - # 64 | 128: 0b0100_00[00 0b1000_00]00 -> 0b0010_0000 - # 128 | 160: 0b1000_00[00 0b1010_00]00 -> 0b0010_1000 - # 160 | 192: 0b1010_00[00 0b1100_00]00 -> 0b0011_0000 - # vvv vvvv removed - # 192 | 254: 0b1100_[00 0b1]xxx_xxxx -> 0b0010_0000 - assert [80, 32, 40, 48, 32] == arr[11:].tolist() + assert [0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1] == arr.tolist() @pytest.mark.skipif(not HAVE_PYDICOM, reason="No pydicom") diff --git a/rle/tests/test_encode.py b/rle/tests/test_encode.py index 4548c10..d4b10eb 100644 --- a/rle/tests/test_encode.py +++ b/rle/tests/test_encode.py @@ -386,15 +386,19 @@ def test_invalid_samples_per_pixel_raises(self): with pytest.raises(ValueError, match=msg): encode_frame(b"", 1, 1, 4, 1, "<") + msg = ( + r"The \(0028,0002\) 'Samples per Pixel' must be 1 if \(0028,0100\) 'Bits " + "Allocated' is 1" + ) + with pytest.raises(ValueError, match=msg): + encode_frame(b"", 1, 1, 3, 1, "<") + def test_invalid_bits_per_pixel_raises(self): """Test exception raised if bits per pixel not valid.""" - msg = r"The \(0028,0100\) 'Bits Allocated' value must be 8, 16, 32 or 64" + msg = r"The \(0028,0100\) 'Bits Allocated' value must be 1, 8, 16, 32 or 64" with pytest.raises(ValueError, match=msg): encode_frame(b"", 1, 1, 1, 0, "<") - with pytest.raises(ValueError, match=msg): - encode_frame(b"", 1, 1, 1, 1, "<") - with pytest.raises(ValueError, match=msg): encode_frame(b"", 1, 1, 1, 7, "<") diff --git a/rle/tests/test_utils.py b/rle/tests/test_utils.py index 87c66ee..3fe248e 100644 --- a/rle/tests/test_utils.py +++ b/rle/tests/test_utils.py @@ -1,5 +1,7 @@ """Tests for the utils module.""" +from struct import pack, unpack + import numpy as np import pytest @@ -17,12 +19,44 @@ from pydicom.pixels.decoders.rle import _rle_decode_frame from rle.data import get_indexed_datasets -from rle.utils import encode_pixel_data, encode_array, pixel_data, pixel_array +from rle.utils import ( + decode_pixel_data, + encode_pixel_data, + encode_array, + pixel_data, + pixel_array, +) +from rle.rle import pack_bits, unpack_bits INDEX_LEE = get_indexed_datasets("1.2.840.10008.1.2.1") +class TestDecodePixelData: + """Tests for decode_pixel_data()""" + + def test_u8_1s_ba1(self): + """Tests bits allocated 1""" + header = b"\x01\x00\x00\x00\x40\x00\x00\x00" + header += (64 - len(header)) * b"\x00" + # 0 0 0 0 0 1 0 1 0 1 1 0 1 1 1 1 + data = b"\xFC\x00\x07\x01\x00\x01\x00\x01\x01\x00\x01\xFD\x01\x00" + src = header + data + opts = { + "rows": 1, + "columns": 16, + "bits_allocated": 1, + } + + frame = decode_pixel_data(src, version=2, **opts) + assert frame == ( + b"\x00\x00\x00\x00\x00\x01\x00\x01\x00\x01\x01\x00\x01\x01\x01\x01" + ) + opts["pack_bits"] = True + frame = decode_pixel_data(src, version=2, **opts) + assert frame == b"\xA0\xF6" + + @pytest.mark.skipif(not HAVE_PYDICOM, reason="no pydicom") class TestEncodeArray: """Tests for utils.encode_array().""" @@ -234,6 +268,42 @@ def test_too_many_segments_raises(self): with pytest.raises(ValueError, match=msg): encode_pixel_data(b"", **kwargs) + def test_u8_1s_ba1(self): + """Tests bits allocated 1""" + opts = { + "rows": 1, + "columns": 16, + "bits_allocated": 1, + "samples_per_pixel": 1, + } + # 0 0 0 0 0 1 0 1 0 1 1 0 1 1 1 1 + enc = encode_pixel_data(b"\xA0\xF6", **opts) + + header = b"\x01\x00\x00\x00\x40\x00\x00\x00" + header += (64 - len(header)) * b"\x00" + data = b"\xFC\x00\x03\x01\x00\x01\x00\xFF\x01\x00\x00\xFD\x01\x00" + assert enc == header + data + assert decode_pixel_data(enc, version=2, **opts) == ( + b"\x00\x00\x00\x00\x00\x01\x00\x01\x00\x01\x01\x00\x01\x01\x01\x01" + ) + + opts = { + "rows": 1, + "columns": 12, + "bits_allocated": 1, + "samples_per_pixel": 1, + } + # 0 0 0 0 0 1 0 1 0 1 1 0 + enc = encode_pixel_data(b"\xA0\xF6", **opts) + + header = b"\x01\x00\x00\x00\x40\x00\x00\x00" + header += (64 - len(header)) * b"\x00" + data = b"\xFC\x00\x03\x01\x00\x01\x00\xFF\x01\x00\x00\x00" + assert enc == header + data + assert decode_pixel_data(enc, version=2, **opts) == ( + b"\x00\x00\x00\x00\x00\x01\x00\x01\x00\x01\x01\x00" + ) + @pytest.mark.skipif(not HAVE_PYDICOM, reason="no pydicom") class TestPixelData: @@ -249,3 +319,236 @@ def test_pixel_data(self): ds.PixelData = data assert np.array_equal(ref, pixel_array(ds)) + + +REFERENCE_PACK_UNPACK = [ + # src, little, big + (b"", [], []), + (b"\x00", [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]), + (b"\x01", [1, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 1]), + (b"\x02", [0, 1, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 1, 0]), + (b"\x04", [0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1, 0, 0]), + (b"\x08", [0, 0, 0, 1, 0, 0, 0, 0], [0, 0, 0, 0, 1, 0, 0, 0]), + (b"\x10", [0, 0, 0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0, 0]), + (b"\x20", [0, 0, 0, 0, 0, 1, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0]), + (b"\x40", [0, 0, 0, 0, 0, 0, 1, 0], [0, 1, 0, 0, 0, 0, 0, 0]), + (b"\x80", [0, 0, 0, 0, 0, 0, 0, 1], [1, 0, 0, 0, 0, 0, 0, 0]), + (b"\xaa", [0, 1, 0, 1, 0, 1, 0, 1], [1, 0, 1, 0, 1, 0, 1, 0]), + (b"\xf0", [0, 0, 0, 0, 1, 1, 1, 1], [1, 1, 1, 1, 0, 0, 0, 0]), + (b"\x0f", [1, 1, 1, 1, 0, 0, 0, 0], [0, 0, 0, 0, 1, 1, 1, 1]), + (b"\xff", [1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1]), + ( + b"\x00\x00", + #| 1st byte | 2nd byte + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ), + ( + b"\x00\x01", + #| 1st byte | 2nd byte + [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1] + ), + ( + b"\x00\x80", + #| 1st byte | 2nd byte + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], + [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0], + ), + ( + b"\x00\xff", + #| 1st byte | 2nd byte + [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1], + [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1], + ), + ( + b"\x01\x80", + #| 1st byte | 2nd byte + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], + [0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0], + ), + ( + b"\x80\x80", + #| 1st byte | 2nd byte + [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1], + [1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0], + ), + ( + b"\xff\x80", + #| 1st byte | 2nd byte + [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0], + ), +] + + +class TestUnpackBits: + """Tests for unpack_bits().""" + + @pytest.mark.parametrize("src, little, big", REFERENCE_PACK_UNPACK) + def test_unpack_bytes(self, src, little, big): + """Test unpacking data without numpy.""" + as_bytes = pack(f"{len(little)}B", *little) + assert unpack_bits(src, 0, "<") == as_bytes + assert unpack_bits(src, 32, "<") == as_bytes + as_bytes = pack(f"{len(big)}B", *big) + assert unpack_bits(src, 0, ">") == as_bytes + assert unpack_bits(src, 32, ">") == as_bytes + + def test_count_little(self): + """Test the `count` parameter for little endian unpacking.""" + assert unpack_bits(b"\x00", 1, "<") == b"\x00" + assert unpack_bits(b"\xff", 1, "<") == b"\x01" + assert unpack_bits(b"\xff", 2, "<") == b"\x01" * 2 + assert unpack_bits(b"\xff", 3, "<") == b"\x01" * 3 + assert unpack_bits(b"\xff", 4, "<") == b"\x01" * 4 + assert unpack_bits(b"\xff", 5, "<") == b"\x01" * 5 + assert unpack_bits(b"\xff", 6, "<") == b"\x01" * 6 + assert unpack_bits(b"\xff", 7, "<") == b"\x01" * 7 + assert unpack_bits(b"\xff", 8, "<") == b"\x01" * 8 + assert unpack_bits(b"\xff\xAA", 9, "<") == b"\x01" * 8 + b"\x00" + assert unpack_bits(b"\xff\xAA", 10, "<") == b"\x01" * 8 + b"\x00\x01" + assert unpack_bits(b"\xff\xAA", 11, "<") == b"\x01" * 8 + b"\x00\x01\x00" + assert unpack_bits(b"\xff\xAA", 12, "<") == b"\x01" * 8 + b"\x00\x01" * 2 + assert unpack_bits(b"\xff\xAA", 13, "<") == b"\x01" * 8 + b"\x00\x01" * 2 + b"\x00" + assert unpack_bits(b"\xff\xAA", 14, "<") == b"\x01" * 8 + b"\x00\x01" * 3 + assert unpack_bits(b"\xff\xAA", 15, "<") == b"\x01" * 8 + b"\x00\x01" * 3 + b"\x00" + assert unpack_bits(b"\xff\xAA", 16, "<") == b"\x01" * 8 + b"\x00\x01" * 4 + + def test_count_big(self): + """Test the `count` parameter for big endian unpacking.""" + assert unpack_bits(b"\x00", 1, ">") == b"\x00" + assert unpack_bits(b"\xff", 1, ">") == b"\x01" + assert unpack_bits(b"\xff", 2, ">") == b"\x01" * 2 + assert unpack_bits(b"\xff", 3, ">") == b"\x01" * 3 + assert unpack_bits(b"\xff", 4, ">") == b"\x01" * 4 + assert unpack_bits(b"\xff", 5, ">") == b"\x01" * 5 + assert unpack_bits(b"\xff", 6, ">") == b"\x01" * 6 + assert unpack_bits(b"\xff", 7, ">") == b"\x01" * 7 + assert unpack_bits(b"\xff", 8, ">") == b"\x01" * 8 + assert unpack_bits(b"\xff\xAA", 9, ">") == b"\x01" * 8 + b"\x01" + assert unpack_bits(b"\xff\xAA", 10, ">") == b"\x01" * 8 + b"\x01\x00" + assert unpack_bits(b"\xff\xAA", 11, ">") == b"\x01" * 8 + b"\x01\x00\x01" + assert unpack_bits(b"\xff\xAA", 12, ">") == b"\x01" * 8 + b"\x01\x00" * 2 + assert unpack_bits(b"\xff\xAA", 13, ">") == b"\x01" * 8 + b"\x01\x00" * 2 + b"\x01" + assert unpack_bits(b"\xff\xAA", 14, ">") == b"\x01" * 8 + b"\x01\x00" * 3 + assert unpack_bits(b"\xff\xAA", 15, ">") == b"\x01" * 8 + b"\x01\x00" * 3 + b"\x01" + assert unpack_bits(b"\xff\xAA", 16, ">") == b"\x01" * 8 + b"\x01\x00" * 4 + + @pytest.mark.parametrize("src, little, big", REFERENCE_PACK_UNPACK) + def test_unpack_bytearray(self, src, little, big): + """Test unpacking data without numpy.""" + as_bytes = pack(f"{len(little)}B", *little) + assert unpack_bits(bytearray(src), 0, "<") == as_bytes + as_bytes = pack(f"{len(big)}B", *big) + assert unpack_bits(bytearray(src), 0, ">") == as_bytes + + +REFERENCE_PACK_PARTIAL_LITTLE = [ + # | 1st byte | 2nd byte + (b"\x00\x40", [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]), # 15-bits + (b"\x00\x20", [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]), + (b"\x00\x10", [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]), + (b"\x00\x08", [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]), + (b"\x00\x04", [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]), + (b"\x00\x02", [0, 0, 0, 0, 0, 0, 0, 0, 0, 1]), + (b"\x00\x01", [0, 0, 0, 0, 0, 0, 0, 0, 1]), # 9-bits + (b"\x80", [0, 0, 0, 0, 0, 0, 0, 1]), # 8-bits + (b"\x40", [0, 0, 0, 0, 0, 0, 1]), + (b"\x20", [0, 0, 0, 0, 0, 1]), + (b"\x10", [0, 0, 0, 0, 1]), + (b"\x08", [0, 0, 0, 1]), + (b"\x04", [0, 0, 1]), + (b"\x02", [0, 1]), + (b"\x01", [1]), + (b"", []), +] +REFERENCE_PACK_PARTIAL_BIG = [ + # | 1st byte | 2nd byte + (b"\x00\x02", [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]), # 15-bits + (b"\x00\x04", [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]), + (b"\x00\x08", [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]), + (b"\x00\x10", [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]), + (b"\x00\x20", [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]), + (b"\x00\x40", [0, 0, 0, 0, 0, 0, 0, 0, 0, 1]), + (b"\x00\x80", [0, 0, 0, 0, 0, 0, 0, 0, 1]), # 9-bits + (b"\x01", [0, 0, 0, 0, 0, 0, 0, 1]), # 8-bits + (b"\x02", [0, 0, 0, 0, 0, 0, 1]), + (b"\x04", [0, 0, 0, 0, 0, 1]), + (b"\x08", [0, 0, 0, 0, 1]), + (b"\x10", [0, 0, 0, 1]), + (b"\x20", [0, 0, 1]), + (b"\x40", [0, 1]), + (b"\x80", [1]), + (b"", []), +] + + +class TestPackBits: + """Tests for pack_bits().""" + + @pytest.mark.parametrize("output, little, big", REFERENCE_PACK_UNPACK) + def test_pack_bytes(self, output, little, big): + """Test packing data.""" + assert output == pack_bits(bytes(little), "<") + assert output == pack_bits(bytes(big), ">") + + @pytest.mark.parametrize("output, little, big", REFERENCE_PACK_UNPACK) + def test_pack_bytearray(self, output, little, big): + """Test packing data.""" + assert output == pack_bits(bytearray(little), "<") + assert output == pack_bits(bytearray(big), ">") + + def test_non_binary_input(self): + """Test non-binary input raises exception.""" + msg = r"Only binary input \(containing zeros or ones\) can be packed" + with pytest.raises(ValueError, match=msg): + pack_bits(b"\x00\x00\x02\x00\x00\x00\x00\x00", "<") + + def test_bytes_input(self): + """Repeat above test with bytes input.""" + # fmt: off + src = bytes( + [ + 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 1, 1, 1, 1, 1, 1, 1, + ] + ) + # fmt: on + assert b"\x00\x55\xff" == pack_bits(src, "<") + assert b"\x00\xAA\xff" == pack_bits(src, ">") + + def test_bytearry_input(self): + """Repeat above test with bytearray input.""" + # fmt: off + src = bytearray( + [ + 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 1, 0, 1, 0, 1, 0, + 1, 1, 1, 1, 1, 1, 1, 1, + ] + ) + # fmt: on + assert b"\x00\x55\xff" == pack_bits(src, "<") + assert b"\x00\xAA\xff" == pack_bits(src, ">") + + @pytest.mark.parametrize("output, src", REFERENCE_PACK_PARTIAL_LITTLE) + def test_pack_partial_bytes(self, src, output): + """Test packing data that isn't a full byte long.""" + assert output == pack_bits(bytes(src), "<") + + @pytest.mark.parametrize("output, src", REFERENCE_PACK_PARTIAL_LITTLE) + def test_pack_partial_bytearray(self, src, output): + """Test packing data that isn't a full byte long.""" + assert output == pack_bits(bytearray(src), "<") + + @pytest.mark.parametrize("output, src", REFERENCE_PACK_PARTIAL_BIG) + def test_pack_partial_bytes_big(self, src, output): + """Test packing data that isn't a full byte long.""" + assert output == pack_bits(bytes(src), ">") + + @pytest.mark.parametrize("output, src", REFERENCE_PACK_PARTIAL_BIG) + def test_pack_partial_bytearray_big(self, src, output): + """Test packing data that isn't a full byte long.""" + assert output == pack_bits(bytearray(src), ">") diff --git a/rle/utils.py b/rle/utils.py index 376c084..d15c277 100644 --- a/rle/utils.py +++ b/rle/utils.py @@ -1,12 +1,19 @@ """Utility functions.""" import enum +import math import sys -from typing import Iterator, Optional, Any, TYPE_CHECKING, cast, Union +from typing import Iterator, Any, TYPE_CHECKING, cast import numpy as np -from rle.rle import decode_frame, decode_segment, encode_frame, encode_segment +from rle.rle import ( + decode_frame, + decode_segment, + encode_frame, + encode_segment, +) +from rle.rle import pack_bits as _pack_bits, unpack_bits as _unpack_bits if TYPE_CHECKING: # pragma: no cover @@ -20,11 +27,16 @@ class Version(enum.IntEnum): def decode_pixel_data( src: bytes, - ds: Optional["Dataset"] = None, + ds: "Dataset | None" = None, version: int = Version.v1, **kwargs: Any, -) -> Union[np.ndarray, bytearray]: - """Return the decoded RLE Lossless data as a :class:`numpy.ndarray`. +) -> np.ndarray | bytearray: + """Return the decoded RLE Lossless data as a :class:`numpy.ndarray` or + :class:`bytearray`. + + .. versionchanged:: 2.2 + + Added the ``"pack_bits"`` decoding option and support for *Bits Allocated* 1. Parameters ---------- @@ -33,7 +45,7 @@ def decode_pixel_data( ds : pydicom.dataset.Dataset, optional A :class:`~pydicom.dataset.Dataset` containing the group ``0x0028`` elements corresponding to the image frame. If not used then `kwargs` - must be supplied. Only used with version ``1``. + must be supplied. Only used with `version` ``1``. version : int, optional * If ``1`` (default) then return the image data as an :class:`numpy.ndarray` @@ -41,22 +53,26 @@ def decode_pixel_data( **kwargs Required keys if `ds` is not supplied or if version is ``2``: - * ``"rows"``: :class:`int` - the number of rows in the decoded image + * ``"rows"``: :class:`int` - the number of rows in the decoded image. * ``"columns"``: :class:`int` - the number of columns in the decoded - image + image. * ``"bits_allocated"``: :class:`int` - the number of bits allocated - to each pixel + to each pixel. Current decoding options are: - * ``{'byteorder': str}`` specify the byte ordering for the decoded data - when more than 8 bits per pixel are used, should be '<' for little - endian ordering (default) or '>' for big-endian ordering. + * ``"byteorder"``: :class:`str` - specify the byte ordering for the decoded + data when more than 8 bits per pixel are used, should be ``'<'`` for little + endian ordering (default) or ``'>'`` for big-endian ordering. + * ``"pack_bits"``: :class:`bool` - (`version` 2 only) if ``True`` and + ``"bits_allocated"`` is ``1`` then return the decoded data in its packed + form (8 pixels per byte), otherwise return the decoded data in its unpacked + form (1 pixel per byte). Default ``False``. Returns ------- bytearray | numpy.ndarray - The image data as either a bytearray or ndarray. + The image data as either a bytearray (`version` 2) or ndarray (`version` 1). Raises ------ @@ -93,12 +109,18 @@ def decode_pixel_data( rows = cast(int, kwargs.get("rows")) bits_allocated = cast(int, kwargs.get("bits_allocated")) byteorder = kwargs.get("byteorder", "<") + as_packed = kwargs.get("pack_bits", False) + + frame: bytearray = decode_frame(src, rows * columns, bits_allocated, byteorder) - return cast(bytearray, decode_frame(src, rows * columns, bits_allocated, byteorder)) + if bits_allocated != 1 or as_packed is False: + return frame + + return _pack_bits(frame, "<") def encode_array( - arr: "np.ndarray", ds: Optional["Dataset"] = None, **kwargs: Any + arr: "np.ndarray", ds: "Dataset | None" = None, **kwargs: Any ) -> Iterator[bytes]: """Yield RLE encoded frames from `arr`. @@ -157,8 +179,8 @@ def encode_array( def encode_pixel_data( src: bytes, - ds: Optional["Dataset"] = None, - byteorder: Optional[str] = None, + ds: "Dataset | None" = None, + byteorder: str | None = None, **kwargs: Any, ) -> bytes: """Return `src` encoded using the DICOM RLE (PackBits) algorithm. @@ -178,7 +200,9 @@ def encode_pixel_data( Parameters ---------- src : bytes - The data for a single image frame data to be RLE encoded. + The data for a single image frame data to be RLE encoded. For *Bits Allocated* + 1 if `src` contains bit-packed data then it will be automatically unpacked + prior to encoding. ds : pydicom.dataset.Dataset, optional The dataset corresponding to `src` with matching values for *Rows*, *Columns*, *Samples per Pixel* and *Bits Allocated*. Required if @@ -195,7 +219,7 @@ def encode_pixel_data( * ``'samples_per_pixel'``: :class:`int` the number of samples per pixel, either 1 for monochrome or 3 for RGB or similar data. * ``'bits_allocated'``: :class:`int` the number of bits needed to contain each - pixel, either 1, 8, 16, 32 or 64. + pixel, must be 1, 8, 16, 32 or 64. Returns ------- @@ -214,34 +238,37 @@ def encode_pixel_data( spp = kwargs["samples_per_pixel"] # Validate input - if spp not in [1, 3]: + if spp not in (1, 3): msg = "(0028,0002) 'Samples per Pixel'" if ds else "'samples_per_pixel'" raise ValueError(f"{msg} must be 1 or 3") - if bpp not in [1, 8, 16, 32, 64]: + if bpp not in (1, 8, 16, 32, 64): msg = "(0028,0100) 'Bits Allocated'" if ds else "'bits_allocated'" raise ValueError(f"{msg} must be 1, 8, 16, 32 or 64") - if bpp / 8 * spp > 15: + if bpp != 1 and bpp / 8 * spp > 15: raise ValueError( "Unable to encode the data as the RLE format used by the DICOM " "Standard only allows a maximum of 15 segments" ) - byteorder = "<" if bpp == 8 else byteorder + byteorder = "<" if bpp <= 8 else byteorder if byteorder not in ("<", ">"): raise ValueError( "A valid 'byteorder' is required when the number of bits per " "pixel is greater than 8" ) - if len(src) != (r * c * bpp / 8 * spp): + src_length = len(src) + if bpp == 1 and src_length == math.ceil((r * c) / 8): + # src is bit packed, so unpack + src = _unpack_bits(src, r * c, "<") + elif src_length != (r * c * bpp / 8 * spp): raise ValueError("The length of the data doesn't match the image parameters") return cast(bytes, encode_frame(src, r, c, spp, bpp, byteorder)) -# TODO: unpack Bits Stored 1 def generate_frames(ds: "Dataset", reshape: bool = True) -> Iterator[np.ndarray]: """Yield a *Pixel Data* frame from `ds` as an :class:`~numpy.ndarray`. @@ -317,7 +344,6 @@ def generate_frames(ds: "Dataset", reshape: bool = True) -> Iterator[np.ndarray] yield arr.transpose(1, 2, 0) -# TODO: unpack Bits Stored 1 def pixel_array(ds: "Dataset") -> "np.ndarray": """Return the entire *Pixel Data* as an :class:`~numpy.ndarray`. @@ -376,3 +402,57 @@ def pixel_data(arr: "np.ndarray", ds: "Dataset") -> bytes: from pydicom.encaps import encapsulate return encapsulate([ii for ii in encode_array(arr, ds)]) + + +def pack_bits(src: bytes | bytearray, bitorder: str = "<") -> bytearray: + """Bit pack `src` + + .. versionadded: 2.2 + + Parameters + ---------- + src : bytes | bytearray + The data to be bit-packed, should only contain zeros and ones. + bitorder : str, optional + The bit ordering to use for the packed data, should be '<' for little endian + ordering (default) or '>' for big-endian ordering. For example, if `src` is + ``b"\x00\x01\x00\x00"`` then ``0x02`` (``0b00000010``) will be returned for + little endian ordering and ``0x40`` (``0b01000000``) for big endian ordering. + + Returns + ------- + bytearray + The bit-packed data, one byte per 8 bytes of the original with padding bits + added if the length of `src` is not a multiple of 8. + """ + return _pack_bits(src, bitorder) + + +def unpack_bits( + src: bytes | bytearray, count: int | None = None, bitorder: str = "<" + ) -> bytearray: + """Bit unpack `src` + + .. versionadded: 2.2 + + Parameters + ---------- + src : bytes | bytearray + The data to be unpacked. + count : int | None, optional + If ``None`` (default) then return the entire unpacked `src`, otherwise return + the first `count` number of bytes from the unpacked `src`. + bitorder : str, optional + The bit ordering to used by the packed data, should be '<' for little endian + ordering (default) or '>' for big-endian ordering. + + Returns + ------- + bytearray + The unpacked data, 8 bytes per byte of `src`. + """ + maximum_nr_bits = len(src) * 8 + if count is None or not (0 < count <= maximum_nr_bits): + count = maximum_nr_bits + + return _unpack_bits(src, count, bitorder) diff --git a/src/lib.rs b/src/lib.rs index aa04348..32088f9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -21,10 +21,132 @@ fn rle(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_function(wrap_pyfunction!(encode_segment, m)?); m.add_function(wrap_pyfunction!(encode_frame, m)?); + m.add_function(wrap_pyfunction!(pack_bits, m)?); + m.add_function(wrap_pyfunction!(unpack_bits, m)?); + Ok(()) } +// Utilities +// --------- + +#[pyfunction] +fn pack_bits<'py>( + src: Vec, bitorder: char, py: Python<'py> +) -> PyResult> { + + match bitorder { + '>' | '<' => {}, + _ => { return Err(PyValueError::new_err("'bitorder' must be '>' or '<'")) } + } + + // Check values are in (0, 1) + if src.iter().max() > Some(&1u8) { + return Err( + PyValueError::new_err("Only binary input (containing zeros or ones) can be packed") + ) + } + + let mut dst: Vec = Vec::new(); + + if bitorder == '<' { + // Bits use little endian ordering + for chunk in src.chunks_exact(8) { + let mut packed = 0u8; + for idx in 0..8 { + packed |= chunk[idx] << idx; + } + dst.push(packed); + } + } else { + // Bits use big endian ordering + for chunk in src.chunks_exact(8) { + let mut packed = 0u8; + for idx in 0..8 { + packed |= chunk[idx] << (7 - idx); + } + dst.push(packed); + } + } + + let remainder = src.len() % 8; + if remainder > 0 { + let mut last_byte = 0u8; + // ..., 0, 1, 0, 0 iterates as 0, 0, 1, 0 + if bitorder == '<' { + for (idx, bit) in src.iter().rev().take(remainder).enumerate() { + // 0 0 0 0 0 0 1 0 + last_byte |= bit << (remainder - idx - 1); + } + } else { + for (idx, bit) in src.iter().rev().take(remainder).enumerate() { + // 0 1 0 0 0 0 0 0 + last_byte |= bit << (8 - remainder - idx); + } + } + dst.push(last_byte); + } + + Ok(PyByteArray::new(py, &dst)) +} + + +#[pyfunction] +fn unpack_bits<'py>( + src: Vec, count: u128, bitorder: char, py: Python<'py> +) -> PyResult> { + match bitorder { + '>' | '<' => {}, + _ => { return Err(PyValueError::new_err("'bitorder' must be '>' or '<'")) } + } + + // The maximum value of `count` should be 2^65 + let nr_bits: u128; + let nr_bytes = u128::try_from(src.len()).unwrap(); + if count == 0 || count > nr_bytes * 8 { + nr_bits = nr_bytes * 8; + } else { + nr_bits = count; + } + + let mut dst: Vec = Vec::new(); + // Shouldn't be more than 2^64 + let nr_whole_bytes = usize::try_from(nr_bits / 8).unwrap(); + // Shouldn't be more than 7 + let nr_remainder_bits = usize::try_from(nr_bits % 8).unwrap(); + + if bitorder == '<' { + // Unpack the whole bytes + for offset in 0..nr_whole_bytes { + for idx in 0..8 { + dst.push((src[offset] >> idx) & 1u8); + } + } + // Do the final (partial) byte, if required + if nr_remainder_bits != 0 { + for idx in 0..nr_remainder_bits { + dst.push((src[nr_whole_bytes] >> idx) & 1u8); + } + } + } else { + for offset in 0..nr_whole_bytes { + for idx in 0..8 { + dst.push((src[offset] >> (7 - idx)) & 1u8); + } + } + if nr_remainder_bits != 0 { + for idx in 0..nr_remainder_bits { + dst.push((src[nr_whole_bytes] >> (7 - idx)) & 1u8); + } + } + + } + + Ok(PyByteArray::new(py, &dst)) +} + + // RLE Decoding // ------------ @@ -147,6 +269,11 @@ fn _decode_frame( let err_invalid_nr_samples = Err( String::from("The (0028,0002) 'Samples per Pixel' must be 1 or 3").into() ); + let err_invalid_nr_samples_ba1 = Err( + String::from( + "The (0028,0002) 'Samples per Pixel' must be 1 if (0028,0100) 'Bits Allocated' is 1" + ).into() + ); let err_segment_length = Err( String::from( "The decoded segment length does not match the expected length" @@ -157,12 +284,10 @@ fn _decode_frame( ); // Check 'Bits Allocated' is 1 or a multiple of 8 - let mut is_bit_packed = false; - let mut bytes_per_pixel: u8; // Valid values are 1 | 2 | 4 | 8 + let bytes_per_pixel: u8; // Valid values are 1 | 2 | 4 | 8 match bpp { 0 => return err_invalid_bits_allocated, 1 => { - is_bit_packed = true; bytes_per_pixel = 1; }, _ => match bpp % 8 { @@ -224,9 +349,9 @@ fn _decode_frame( match spp { 1 => {}, 3 => { - // Bit packed data must be 1 sample per pixel + // Bits allocated 1 must be 1 sample per pixel match bpp { - 1 => { return err_invalid_nr_samples }, + 1 => { return err_invalid_nr_samples_ba1 }, _ => {} } }, @@ -254,79 +379,50 @@ fn _decode_frame( // TODO: handle unwrap let pps = usize::try_from(nr_pixels).unwrap(); // Concatenate sample planes into a frame - if !is_bit_packed { - // Watch for overflow here; u32 * u32 -> u64 - // Actual values are (u16 * u16) * u8 - let expected_length = usize::try_from( - nr_pixels * nr_segments - ).unwrap(); - - // Pre-allocate a vector for the decoded frame - let mut frame = vec![0u8; expected_length]; - - for sample in 0..spp { // 0 or (0, 1, 2) - // Sample offset - let so = usize::from(sample * bytes_per_pixel) * pps; - - // Interleave the segments into a sample plane - for byte_offset in 0..bytes_per_pixel { // 0, [1, 2, 3, ..., 7] - // idx should be in range [0, 15] - let idx: usize; - if byteorder == '>' { // big-endian - // e.g. 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 - idx = usize::from(sample * bytes_per_pixel + byte_offset); - } else { // little-endian - // e.g. 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8 - idx = usize::from( - bytes_per_pixel - byte_offset + bytes_per_pixel * sample - ) - 1; - } - - // offsets[idx] is u32 -> usize not guaranteed - let start = usize::try_from(offsets[idx]).unwrap(); - let end = usize::try_from(offsets[idx + 1]).unwrap(); - - // Decode the segment into the frame - let len = _decode_segment_into_frame( - <&[u8]>::try_from(&src[start..end]).unwrap(), - &mut frame, - usize::from(bytes_per_pixel), - usize::from(byte_offset) + so - )?; - - if len != pps { return err_segment_length } + // Watch for overflow here; u32 * u32 -> u64 + // Actual values are (u16 * u16) * u8 + let expected_length = usize::try_from( + nr_pixels * u32::from(nr_segments) + ).unwrap(); + + // Pre-allocate a vector for the decoded frame + let mut frame = vec![0u8; expected_length]; + + for sample in 0..spp { // 0 or (0, 1, 2) + // Sample offset + let so = usize::from(sample * bytes_per_pixel) * pps; + + // Interleave the segments into a sample plane + for byte_offset in 0..bytes_per_pixel { // 0, [1, 2, 3, ..., 7] + // idx should be in range [0, 15] + let idx: usize; + if byteorder == '>' { // big-endian + // e.g. 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 + idx = usize::from(sample * bytes_per_pixel + byte_offset); + } else { // little-endian + // e.g. 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8 + idx = usize::from( + bytes_per_pixel - byte_offset + bytes_per_pixel * sample + ) - 1; } - } - return Ok(frame) - } + // offsets[idx] is u32 -> usize not guaranteed + let start = usize::try_from(offsets[idx]).unwrap(); + let end = usize::try_from(offsets[idx + 1]).unwrap(); - /* Bit-packed data - ------------------ - For bit-packed data (i.e. a *Bits Allocated* of 1), we decode the RLE encoded data but - leave the bit-packing in place. This means the length of each decoded segment is not - equal to the number of pixels but instead the number of pixels / 8, rounded up to the - nearest whole integer, with the difference being accounted for by 0b0 padding bits. - */ + // Decode the segment into the frame + let len = _decode_segment_into_frame( + <&[u8]>::try_from(&src[start..end]).unwrap(), + &mut frame, + usize::from(bytes_per_pixel), + usize::from(byte_offset) + so + )?; - // The expected number of whole bytes per segment after decoding - let mut bytes_per_segment: usize; - match nr_pixels % 8 { - 0 => { bytes_per_segment = pps / 8; }, - _ => { bytes_per_segment = (pps + (8 - pps % 8)) / 8; }, + if len != pps { return err_segment_length } + } } - // For bit-packed data we can only have 1 segment. - let start = usize::try_from(offsets[0]).unwrap(); - let end = usize::try_from(offsets[1]).unwrap(); - let segment = _decode_bit_packed_segment( - <&[u8]>::try_from(&src[start..end]).unwrap(), - bytes_per_segment, - )?; - - if segment.len() != bytes_per_segment) { return err_segment_length } - - return Ok(segment) + return Ok(frame) } @@ -581,7 +677,7 @@ fn _decode_segment(src: &[u8], dst: &mut Vec) -> Result<(), Box> #[pyfunction] fn encode_frame<'py>( - src: &[u8], rows: u16, cols: u16, spp: u8, bpp: u8, byteorder: char, py: Python<'py> + src: Vec, rows: u16, cols: u16, spp: u8, bpp: u8, byteorder: char, py: Python<'py> ) -> PyResult> { /* Return RLE encoded `src` as bytes. @@ -616,7 +712,7 @@ fn encode_frame<'py>( fn _encode_frame( - src: &[u8], dst: &mut Vec, rows: u16, cols: u16, spp: u8, bpp: u8, byteorder: char + src: Vec, dst: &mut Vec, rows: u16, cols: u16, spp: u8, bpp: u8, byteorder: char ) -> Result<(), Box> { /* @@ -645,6 +741,11 @@ fn _encode_frame( let err_invalid_nr_samples = Err( String::from("The (0028,0002) 'Samples per Pixel' must be 1 or 3").into() ); + let err_invalid_nr_samples_ba1 = Err( + String::from( + "The (0028,0002) 'Samples per Pixel' must be 1 if (0028,0100) 'Bits Allocated' is 1" + ).into() + ); let err_invalid_bits_allocated = Err( String::from( "The (0028,0100) 'Bits Allocated' value must be 1, 8, 16, 32 or 64" @@ -671,24 +772,18 @@ fn _encode_frame( // Check 'Samples per Pixel' is either 1 or 3 // Check 'Bits Allocated' is 1 or a multiple of 8 // Check 'Samples per Pixel' is 1 if 'Bits Allocated' is 1 - let mut is_bit_packed = false; - let mut bytes_per_pixel: u8; + let bytes_per_pixel: u8; match spp { 1 => { match bpp { - 1 => { - is_bit_packed = true; - bytes_per_pixel = 1; - }, - 8 | 16 | 32 | 64 => { - bytes_per_pixel = bpp / 8; - }, + 1 => { bytes_per_pixel = 1; }, + 8 | 16 | 32 | 64 => { bytes_per_pixel = bpp / 8; }, _ => { return err_invalid_bits_allocated } } } 3 => { match bpp { - 1 => { return err_invalid_nr_samples }, + 1 => { return err_invalid_nr_samples_ba1 }, 8 | 16 | 32 | 64 => { bytes_per_pixel = bpp / 8; }, _ => { return err_invalid_bits_allocated } } @@ -713,16 +808,9 @@ fn _encode_frame( let c = usize::try_from(cols).unwrap(); let total_pixels = r * c * usize::from(spp); - if !is_bit_packed { - let total_length = total_pixels * usize::from(bytes_per_pixel); - if src.len() != total_length { return err_invalid_parameters } - } else { - let mut total_length: usize; - match total_pixels % 8 { - 0 => { total_length = total_pixels / 8; }, - _ => { total_length = (total_pixels + (8 - total_pixels % 8)) / 8; } - } - if src.len() != total_length { return err_invalid_parameters } + let total_length = total_pixels * usize::from(bytes_per_pixel); + if src.len() != total_length { + return err_invalid_parameters } let nr_segments: u8 = spp * bytes_per_pixel; @@ -756,21 +844,13 @@ fn _encode_frame( } // Encode! Note the offset start of the `src` iter - if !is_bit_packed { - let segment: Vec = src[start_indices[idx]..] - .into_iter() - .step_by(usize::from(spp * bytes_per_pixel)) - .cloned() - .collect(); - - _encode_segment_from_vector(segment, dst, cols)?; - } else { - // Should only ever be 1 sample per pixel -> 1 segment - // cols is wrong here, should be / 8 - // Also the DICOM Standard says each row should be encoded separately, - // what do we do if the number of columns isn't divisble by 8? - _encode_segment_from_vector(src, dst, cols)?; - } + let segment: Vec = src[start_indices[idx]..] + .into_iter() + .step_by(usize::from(spp * bytes_per_pixel)) + .cloned() + .collect(); + + _encode_segment_from_vector(segment, dst, cols)?; } Ok(()) From f4fa1dbf9ccbb2abf3a0346df5d694ebafa4a618 Mon Sep 17 00:00:00 2001 From: scaramallion Date: Mon, 8 Sep 2025 08:47:50 +1000 Subject: [PATCH 6/9] Cleanup and fix 3.14 tests --- .github/workflows/pytest-builds.yml | 12 ++++- src/lib.rs | 70 ----------------------------- 2 files changed, 11 insertions(+), 71 deletions(-) diff --git a/.github/workflows/pytest-builds.yml b/.github/workflows/pytest-builds.yml index 4022531..53fb4dc 100644 --- a/.github/workflows/pytest-builds.yml +++ b/.github/workflows/pytest-builds.yml @@ -30,13 +30,23 @@ jobs: run: curl https://sh.rustup.rs -sSf | sh -s -- -y - - name: Install dependencies + - name: Install dependencies (!= 3.14) + if: ${{ matrix.python-version }} != '3.14' run: | pip install --upgrade pip pip install setuptools-rust pytest pydicom coverage pytest-cov pip install git+https://github.com/pydicom/pylibjpeg-data pip install -e . + - name: Install dependencies (== 3.14) + if: ${{ matrix.python-version }} == '3.14' + run: | + pip install --upgrade pip + pip install setuptools-rust pytest coverage pytest-cov + pip install git+https://github.com/pydicom/pylibjpeg-data + pip install git+https://github.com/pydicom/pydicom + pip install -e . + - name: Test with pytest env: PYTHON_VERSION: ${{ matrix.python-version }} diff --git a/src/lib.rs b/src/lib.rs index 32088f9..435a6a0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,8 +2,6 @@ use std::convert::TryFrom; use std::error::Error; -// use bitvec::prelude::*; - use pyo3::prelude::*; use pyo3::wrap_pyfunction; use pyo3::types::{PyBytes, PyByteArray}; @@ -426,74 +424,6 @@ fn _decode_frame( } -fn _decode_bit_packed_segment( - src: &[u8], segment_length: usize -) -> Result, Box> { - /* - - The returned data is guaranteed to be no more than segment_length in size. - */ - let eod = src.len() - 1; - let mut pos = 0; - let mut header_byte: usize; - let mut dst: Vec = Vec::new(); - let mut op_len: usize; - let mut idx: usize = 0; // number of bytes decoded - - let err_eod = Err( - String::from( - "The end of the data was reached before the segment was completely decoded" - ).into() - ); - - loop { - // `header_byte` is equivalent to N in the DICOM Standard - // usize is at least u8 - header_byte = usize::from(src[pos]); - pos += 1; - if header_byte > 128 { - // Extend by copying the next byte (-N + 1) times - // however since using uint8 instead of int8 this will be - // (256 - N + 1) times - op_len = 257 - header_byte; - - // Check we have enough encoded data - if pos > eod { return err_eod } - - // Check segment for excess padding - if (idx + op_len) > segment_length { - dst.extend(vec![src[pos]; segment_length - idx]); - - return Ok(dst) - } - - dst.extend(vec![src[pos]; op_len]); - idx += op_len; - pos += 1; - } else if header_byte < 128 { - // Extend by literally copying the next (N + 1) bytes - op_len = header_byte + 1; - - // Check we have enough encoded data - if (pos + header_byte) > eod { return err_eod } - - // Check segment for excess padding - if (idx + op_len) > segment_length { - dst.extend(&src[pos..pos + segment_length - idx]); - - return Ok(dst) - } - - dst.extend(&src[pos..pos + op_len]); - pos += header_byte + 1; - idx += op_len; - } // header_byte == 128 is noop - - if pos >= eod { return Ok(dst) } - } -} - - fn _decode_segment_into_frame( src: &[u8], dst: &mut Vec, bpp: usize, initial_offset: usize ) -> Result> { From 027a75464b120f3335538025b1ebcfefe22a3697 Mon Sep 17 00:00:00 2001 From: scaramallion Date: Mon, 8 Sep 2025 08:50:42 +1000 Subject: [PATCH 7/9] Fix tests check --- .github/workflows/pytest-builds.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pytest-builds.yml b/.github/workflows/pytest-builds.yml index 53fb4dc..90d1e22 100644 --- a/.github/workflows/pytest-builds.yml +++ b/.github/workflows/pytest-builds.yml @@ -31,7 +31,7 @@ jobs: curl https://sh.rustup.rs -sSf | sh -s -- -y - name: Install dependencies (!= 3.14) - if: ${{ matrix.python-version }} != '3.14' + if: ${{ matrix.python-version != '3.14' }} run: | pip install --upgrade pip pip install setuptools-rust pytest pydicom coverage pytest-cov @@ -39,7 +39,7 @@ jobs: pip install -e . - name: Install dependencies (== 3.14) - if: ${{ matrix.python-version }} == '3.14' + if: ${{ matrix.python-version == '3.14' }} run: | pip install --upgrade pip pip install setuptools-rust pytest coverage pytest-cov From 0df89e219ac828886ec2b9be18b11b09013a6c4b Mon Sep 17 00:00:00 2001 From: scaramallion Date: Mon, 8 Sep 2025 08:58:27 +1000 Subject: [PATCH 8/9] Add tests for missing coverage --- rle/tests/test_utils.py | 107 ++++++++++++++++++++++++++++------------ rle/utils.py | 2 +- 2 files changed, 76 insertions(+), 33 deletions(-) diff --git a/rle/tests/test_utils.py b/rle/tests/test_utils.py index 3fe248e..371d865 100644 --- a/rle/tests/test_utils.py +++ b/rle/tests/test_utils.py @@ -25,8 +25,10 @@ encode_array, pixel_data, pixel_array, + pack_bits, + unpack_bits, ) -from rle.rle import pack_bits, unpack_bits +from rle.rle import pack_bits as _pack_bits, unpack_bits as _unpack_bits INDEX_LEE = get_indexed_datasets("1.2.840.10008.1.2.1") @@ -383,10 +385,20 @@ def test_pixel_data(self): class TestUnpackBits: - """Tests for unpack_bits().""" + """Tests for _unpack_bits().""" @pytest.mark.parametrize("src, little, big", REFERENCE_PACK_UNPACK) def test_unpack_bytes(self, src, little, big): + """Test unpacking data without numpy.""" + as_bytes = pack(f"{len(little)}B", *little) + assert _unpack_bits(src, 0, "<") == as_bytes + assert _unpack_bits(src, 32, "<") == as_bytes + as_bytes = pack(f"{len(big)}B", *big) + assert _unpack_bits(src, 0, ">") == as_bytes + assert _unpack_bits(src, 32, ">") == as_bytes + + @pytest.mark.parametrize("src, little, big", REFERENCE_PACK_UNPACK) + def test_unpack_bytes_util(self, src, little, big): """Test unpacking data without numpy.""" as_bytes = pack(f"{len(little)}B", *little) assert unpack_bits(src, 0, "<") == as_bytes @@ -397,6 +409,31 @@ def test_unpack_bytes(self, src, little, big): def test_count_little(self): """Test the `count` parameter for little endian unpacking.""" + assert _unpack_bits(b"\x00", 1, "<") == b"\x00" + assert _unpack_bits(b"\xff", 1, "<") == b"\x01" + assert _unpack_bits(b"\xff", 2, "<") == b"\x01" * 2 + assert _unpack_bits(b"\xff", 3, "<") == b"\x01" * 3 + assert _unpack_bits(b"\xff", 4, "<") == b"\x01" * 4 + assert _unpack_bits(b"\xff", 5, "<") == b"\x01" * 5 + assert _unpack_bits(b"\xff", 6, "<") == b"\x01" * 6 + assert _unpack_bits(b"\xff", 7, "<") == b"\x01" * 7 + assert _unpack_bits(b"\xff", 8, "<") == b"\x01" * 8 + assert _unpack_bits(b"\xff\xAA", 9, "<") == b"\x01" * 8 + b"\x00" + assert _unpack_bits(b"\xff\xAA", 10, "<") == b"\x01" * 8 + b"\x00\x01" + assert _unpack_bits(b"\xff\xAA", 11, "<") == b"\x01" * 8 + b"\x00\x01\x00" + assert _unpack_bits(b"\xff\xAA", 12, "<") == b"\x01" * 8 + b"\x00\x01" * 2 + assert _unpack_bits(b"\xff\xAA", 13, "<") == b"\x01" * 8 + b"\x00\x01" * 2 + b"\x00" + assert _unpack_bits(b"\xff\xAA", 14, "<") == b"\x01" * 8 + b"\x00\x01" * 3 + assert _unpack_bits(b"\xff\xAA", 15, "<") == b"\x01" * 8 + b"\x00\x01" * 3 + b"\x00" + assert _unpack_bits(b"\xff\xAA", 16, "<") == b"\x01" * 8 + b"\x00\x01" * 4 + + def test_count_little_util(self): + """Test the `count` parameter for little endian unpacking.""" + assert unpack_bits(b"\x00", -1, "<") == b"\x00" * 8 + assert unpack_bits(b"\x00", 10, "<") == b"\x00" * 8 + assert unpack_bits(b"\x00", None, "<") == b"\x00" * 8 + assert unpack_bits(b"\x00", 0, "<") == b"\x00" * 8 + assert unpack_bits(b"\x00", 1, "<") == b"\x00" assert unpack_bits(b"\xff", 1, "<") == b"\x01" assert unpack_bits(b"\xff", 2, "<") == b"\x01" * 2 @@ -417,31 +454,31 @@ def test_count_little(self): def test_count_big(self): """Test the `count` parameter for big endian unpacking.""" - assert unpack_bits(b"\x00", 1, ">") == b"\x00" - assert unpack_bits(b"\xff", 1, ">") == b"\x01" - assert unpack_bits(b"\xff", 2, ">") == b"\x01" * 2 - assert unpack_bits(b"\xff", 3, ">") == b"\x01" * 3 - assert unpack_bits(b"\xff", 4, ">") == b"\x01" * 4 - assert unpack_bits(b"\xff", 5, ">") == b"\x01" * 5 - assert unpack_bits(b"\xff", 6, ">") == b"\x01" * 6 - assert unpack_bits(b"\xff", 7, ">") == b"\x01" * 7 - assert unpack_bits(b"\xff", 8, ">") == b"\x01" * 8 - assert unpack_bits(b"\xff\xAA", 9, ">") == b"\x01" * 8 + b"\x01" - assert unpack_bits(b"\xff\xAA", 10, ">") == b"\x01" * 8 + b"\x01\x00" - assert unpack_bits(b"\xff\xAA", 11, ">") == b"\x01" * 8 + b"\x01\x00\x01" - assert unpack_bits(b"\xff\xAA", 12, ">") == b"\x01" * 8 + b"\x01\x00" * 2 - assert unpack_bits(b"\xff\xAA", 13, ">") == b"\x01" * 8 + b"\x01\x00" * 2 + b"\x01" - assert unpack_bits(b"\xff\xAA", 14, ">") == b"\x01" * 8 + b"\x01\x00" * 3 - assert unpack_bits(b"\xff\xAA", 15, ">") == b"\x01" * 8 + b"\x01\x00" * 3 + b"\x01" - assert unpack_bits(b"\xff\xAA", 16, ">") == b"\x01" * 8 + b"\x01\x00" * 4 + assert _unpack_bits(b"\x00", 1, ">") == b"\x00" + assert _unpack_bits(b"\xff", 1, ">") == b"\x01" + assert _unpack_bits(b"\xff", 2, ">") == b"\x01" * 2 + assert _unpack_bits(b"\xff", 3, ">") == b"\x01" * 3 + assert _unpack_bits(b"\xff", 4, ">") == b"\x01" * 4 + assert _unpack_bits(b"\xff", 5, ">") == b"\x01" * 5 + assert _unpack_bits(b"\xff", 6, ">") == b"\x01" * 6 + assert _unpack_bits(b"\xff", 7, ">") == b"\x01" * 7 + assert _unpack_bits(b"\xff", 8, ">") == b"\x01" * 8 + assert _unpack_bits(b"\xff\xAA", 9, ">") == b"\x01" * 8 + b"\x01" + assert _unpack_bits(b"\xff\xAA", 10, ">") == b"\x01" * 8 + b"\x01\x00" + assert _unpack_bits(b"\xff\xAA", 11, ">") == b"\x01" * 8 + b"\x01\x00\x01" + assert _unpack_bits(b"\xff\xAA", 12, ">") == b"\x01" * 8 + b"\x01\x00" * 2 + assert _unpack_bits(b"\xff\xAA", 13, ">") == b"\x01" * 8 + b"\x01\x00" * 2 + b"\x01" + assert _unpack_bits(b"\xff\xAA", 14, ">") == b"\x01" * 8 + b"\x01\x00" * 3 + assert _unpack_bits(b"\xff\xAA", 15, ">") == b"\x01" * 8 + b"\x01\x00" * 3 + b"\x01" + assert _unpack_bits(b"\xff\xAA", 16, ">") == b"\x01" * 8 + b"\x01\x00" * 4 @pytest.mark.parametrize("src, little, big", REFERENCE_PACK_UNPACK) def test_unpack_bytearray(self, src, little, big): """Test unpacking data without numpy.""" as_bytes = pack(f"{len(little)}B", *little) - assert unpack_bits(bytearray(src), 0, "<") == as_bytes + assert _unpack_bits(bytearray(src), 0, "<") == as_bytes as_bytes = pack(f"{len(big)}B", *big) - assert unpack_bits(bytearray(src), 0, ">") == as_bytes + assert _unpack_bits(bytearray(src), 0, ">") == as_bytes REFERENCE_PACK_PARTIAL_LITTLE = [ @@ -489,6 +526,12 @@ class TestPackBits: @pytest.mark.parametrize("output, little, big", REFERENCE_PACK_UNPACK) def test_pack_bytes(self, output, little, big): + """Test packing data.""" + assert output == _pack_bits(bytes(little), "<") + assert output == _pack_bits(bytes(big), ">") + + @pytest.mark.parametrize("output, little, big", REFERENCE_PACK_UNPACK) + def test_pack_bytes_utils(self, output, little, big): """Test packing data.""" assert output == pack_bits(bytes(little), "<") assert output == pack_bits(bytes(big), ">") @@ -496,14 +539,14 @@ def test_pack_bytes(self, output, little, big): @pytest.mark.parametrize("output, little, big", REFERENCE_PACK_UNPACK) def test_pack_bytearray(self, output, little, big): """Test packing data.""" - assert output == pack_bits(bytearray(little), "<") - assert output == pack_bits(bytearray(big), ">") + assert output == _pack_bits(bytearray(little), "<") + assert output == _pack_bits(bytearray(big), ">") def test_non_binary_input(self): """Test non-binary input raises exception.""" msg = r"Only binary input \(containing zeros or ones\) can be packed" with pytest.raises(ValueError, match=msg): - pack_bits(b"\x00\x00\x02\x00\x00\x00\x00\x00", "<") + _pack_bits(b"\x00\x00\x02\x00\x00\x00\x00\x00", "<") def test_bytes_input(self): """Repeat above test with bytes input.""" @@ -516,8 +559,8 @@ def test_bytes_input(self): ] ) # fmt: on - assert b"\x00\x55\xff" == pack_bits(src, "<") - assert b"\x00\xAA\xff" == pack_bits(src, ">") + assert b"\x00\x55\xff" == _pack_bits(src, "<") + assert b"\x00\xAA\xff" == _pack_bits(src, ">") def test_bytearry_input(self): """Repeat above test with bytearray input.""" @@ -530,25 +573,25 @@ def test_bytearry_input(self): ] ) # fmt: on - assert b"\x00\x55\xff" == pack_bits(src, "<") - assert b"\x00\xAA\xff" == pack_bits(src, ">") + assert b"\x00\x55\xff" == _pack_bits(src, "<") + assert b"\x00\xAA\xff" == _pack_bits(src, ">") @pytest.mark.parametrize("output, src", REFERENCE_PACK_PARTIAL_LITTLE) def test_pack_partial_bytes(self, src, output): """Test packing data that isn't a full byte long.""" - assert output == pack_bits(bytes(src), "<") + assert output == _pack_bits(bytes(src), "<") @pytest.mark.parametrize("output, src", REFERENCE_PACK_PARTIAL_LITTLE) def test_pack_partial_bytearray(self, src, output): """Test packing data that isn't a full byte long.""" - assert output == pack_bits(bytearray(src), "<") + assert output == _pack_bits(bytearray(src), "<") @pytest.mark.parametrize("output, src", REFERENCE_PACK_PARTIAL_BIG) def test_pack_partial_bytes_big(self, src, output): """Test packing data that isn't a full byte long.""" - assert output == pack_bits(bytes(src), ">") + assert output == _pack_bits(bytes(src), ">") @pytest.mark.parametrize("output, src", REFERENCE_PACK_PARTIAL_BIG) def test_pack_partial_bytearray_big(self, src, output): """Test packing data that isn't a full byte long.""" - assert output == pack_bits(bytearray(src), ">") + assert output == _pack_bits(bytearray(src), ">") diff --git a/rle/utils.py b/rle/utils.py index d15c277..439a828 100644 --- a/rle/utils.py +++ b/rle/utils.py @@ -429,7 +429,7 @@ def pack_bits(src: bytes | bytearray, bitorder: str = "<") -> bytearray: def unpack_bits( - src: bytes | bytearray, count: int | None = None, bitorder: str = "<" + src: bytes | bytearray, count: int | None = None, bitorder: str = "<" ) -> bytearray: """Bit unpack `src` From 4beba4480e515d66aeb4cd34ca6e89409523f8ae Mon Sep 17 00:00:00 2001 From: scaramallion Date: Mon, 8 Sep 2025 09:06:34 +1000 Subject: [PATCH 9/9] Bump version to release --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index ab085eb..00a049c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,7 +33,7 @@ keywords = ["dicom pydicom python rle pylibjpeg rust"] license = "MIT" name = "pylibjpeg-rle" readme = "README.md" -version = "2.2.0.dev0" +version = "2.2.0" requires-python = ">=3.10" dependencies = [ "numpy>=2.0",