The decoder now treats non-interleaved streams as blocks

quilan1 · quilan1 · commit 6d9fd3ef34bc · 2021-01-14T14:06:08.000-05:00
In the standard, section 4.8.2 it states: > Minimum Coded Unit > [...] If the compressed image data is non-interleaved, the MCU is > defined to be one data unit [...] If the compressed data is > interleaved, the MCU contains one or more data units from each > component. Previously, things were processed from stream as if they were always batched group size. This typically doesn't cause a problem, but some progressive jpegs perform additional adjustment passes on individual components. In events where (for example) a component has a 2x2 sampling factor, this meant that things would be processed incorrectly. The offsets were correct but, for example in #173, an EOB for 240 blocks would be exhausted after only a quarter of the (incorrectly) expected MCUs were complete. This meant that it would continue to read from the stream as it thought the EOB was over. In this case, a reset marker was incorrectly consumed and things went off the rails shortly after. Now a distinction is made between MCU sizes in interleaved & non-interleaved streams. Most of the time, in non-interleaved streams this manifests as MCUs being treated as blocks instead (and adjusting the indices involved in that). One complication is with how the worker thread pool currently accepts 'MCU rows' from the scan; always in batched size. So, while the stream treats MCUs as blocks, the worker threads are still looking for full-sized units. This means some care must be taken to correctly time when to send the data over to the worker threads. The final change undertaken was to move the reset check logic to the top of the processing loop; this simplified some of the logic and ensured that interleaved & non-interleaved streams were treated correctly. This also (I believe), addresses the concerns of #89.
diff --git a/src/decoder.rs b/src/decoder.rs
@@ -484,9 +484,6 @@ impl<R: Read> Decoder<R> {
             }
         }
 
-        let blocks_per_mcu: Vec<u16> = components.iter()
-                                                 .map(|c| c.horizontal_sampling_factor as u16 * c.vertical_sampling_factor as u16)
-                                                 .collect();
         let is_progressive = frame.coding_process == CodingProcess::DctProgressive;
         let is_interleaved = components.len() > 1;
         let mut dummy_block = [0i16; 64];
@@ -504,70 +501,37 @@ impl<R: Read> Decoder<R> {
             }
         }
 
-        for mcu_y in 0 .. frame.mcu_size.height {
-            for mcu_x in 0 .. frame.mcu_size.width {
-                for (i, component) in components.iter().enumerate() {
-                    for j in 0 .. blocks_per_mcu[i] {
-                        let (block_x, block_y) = if is_interleaved {
-                            // Section A.2.3
-                            (mcu_x * component.horizontal_sampling_factor as u16 + j % component.horizontal_sampling_factor as u16,
-                             mcu_y * component.vertical_sampling_factor as u16 + j / component.horizontal_sampling_factor as u16)
-                        }
-                        else {
-                            // Section A.2.2
-
-                            let blocks_per_row = component.block_size.width as usize;
-                            let block_num = (mcu_y as usize * frame.mcu_size.width as usize +
-                                mcu_x as usize) * blocks_per_mcu[i] as usize + j as usize;
-
-                            let x = (block_num % blocks_per_row) as u16;
-                            let y = (block_num / blocks_per_row) as u16;
-
-                            if x * component.dct_scale as u16 >= component.size.width || y * component.dct_scale as u16 >= component.size.height {
-                                continue;
-                            }
+        // 4.8.2
+        // When reading from the stream, if the data is non-interleaved then an MCU consists of
+        // exactly one block (effectively a 1x1 sample).
+        let (mcu_horizontal_samples, mcu_vertical_samples) = if is_interleaved {
+            let horizontal = components.iter().map(|component| component.horizontal_sampling_factor as u16).collect::<Vec<_>>();
+            let vertical = components.iter().map(|component| component.vertical_sampling_factor as u16).collect::<Vec<_>>();
+            (horizontal, vertical)
+        } else {
+            (vec![1], vec![1])
+        };
 
-                            (x, y)
-                        };
+        // This also affects how many MCU values we read from stream. If it's a non-interleaved stream,
+        // the MCUs will be exactly the block count.
+        let (max_mcu_x, max_mcu_y) = if is_interleaved {
+            (frame.mcu_size.width, frame.mcu_size.height)
+        } else {
+            (components[0].block_size.width, components[0].block_size.height)
+        };
 
-                        let block_offset = (block_y as usize * component.block_size.width as usize + block_x as usize) * 64;
-                        let mcu_row_offset = mcu_y as usize * component.block_size.width as usize * component.vertical_sampling_factor as usize * 64;
-                        let coefficients = if is_progressive {
-                            &mut self.coefficients[scan.component_indices[i]][block_offset .. block_offset + 64]
-                        } else if finished[i] {
-                            &mut mcu_row_coefficients[i][block_offset - mcu_row_offset .. block_offset - mcu_row_offset + 64]
-                        } else {
-                            &mut dummy_block[..]
-                        };
+        for mcu_y in 0..max_mcu_y {
+            if mcu_y * 8 >= frame.image_size.height {
+                break;
+            }
 
-                        if scan.successive_approximation_high == 0 {
-                            decode_block(&mut self.reader,
-                                         coefficients,
-                                         &mut huffman,
-                                         self.dc_huffman_tables[scan.dc_table_indices[i]].as_ref(),
-                                         self.ac_huffman_tables[scan.ac_table_indices[i]].as_ref(),
-                                         scan.spectral_selection.clone(),
-                                         scan.successive_approximation_low,
-                                         &mut eob_run,
-                                         &mut dc_predictors[i])?;
-                        }
-                        else {
-                            decode_block_successive_approximation(&mut self.reader,
-                                                                  coefficients,
-                                                                  &mut huffman,
-                                                                  self.ac_huffman_tables[scan.ac_table_indices[i]].as_ref(),
-                                                                  scan.spectral_selection.clone(),
-                                                                  scan.successive_approximation_low,
-                                                                  &mut eob_run)?;
-                        }
-                    }
+            for mcu_x in 0..max_mcu_x {
+                if mcu_x * 8 >= frame.image_size.width {
+                    break;
                 }
 
                 if self.restart_interval > 0 {
-                    let is_last_mcu = mcu_x == frame.mcu_size.width - 1 && mcu_y == frame.mcu_size.height - 1;
-                    mcus_left_until_restart -= 1;
-
-                    if mcus_left_until_restart == 0 && !is_last_mcu {
+                    if mcus_left_until_restart == 0 {
                         match huffman.take_marker(&mut self.reader)? {
                             Some(Marker::RST(n)) => {
                                 if n != expected_rst_num {
@@ -587,16 +551,86 @@ impl<R: Read> Decoder<R> {
                             None => return Err(Error::Format(format!("no marker found where RST{} was expected", expected_rst_num))),
                         }
                     }
+
+                    mcus_left_until_restart -= 1;
+                }
+
+                for (i, component) in components.iter().enumerate() {
+                    for v_pos in 0..mcu_vertical_samples[i] {
+                        for h_pos in 0..mcu_horizontal_samples[i] {
+                            let coefficients = if is_progressive {
+                                let block_y = (mcu_y * mcu_vertical_samples[i] + v_pos) as usize;
+                                let block_x = (mcu_x * mcu_horizontal_samples[i] + h_pos) as usize;
+                                let block_offset = (block_y * component.block_size.width as usize + block_x) * 64;
+                                &mut self.coefficients[scan.component_indices[i]][block_offset..block_offset + 64]
+                            } else if finished[i] {
+                                // Because the worker thread operates in batches as if we were always interleaved, we
+                                // need to distinguish between a single-shot buffer and one that's currently in process
+                                // (for a non-interleaved) stream
+                                let mcu_batch_current_row = if is_interleaved {
+                                    0
+                                } else {
+                                    mcu_y % component.vertical_sampling_factor as u16
+                                };
+
+                                let block_y = (mcu_batch_current_row * mcu_vertical_samples[i] + v_pos) as usize;
+                                let block_x = (mcu_x * mcu_horizontal_samples[i] + h_pos) as usize;
+                                let block_offset = (block_y * component.block_size.width as usize + block_x) * 64;
+                                &mut mcu_row_coefficients[i][block_offset..block_offset + 64]
+                            } else {
+                                &mut dummy_block[..]
+                            };
+
+                            if scan.successive_approximation_high == 0 {
+                                decode_block(&mut self.reader,
+                                            coefficients,
+                                            &mut huffman,
+                                            self.dc_huffman_tables[scan.dc_table_indices[i]].as_ref(),
+                                            self.ac_huffman_tables[scan.ac_table_indices[i]].as_ref(),
+                                            scan.spectral_selection.clone(),
+                                            scan.successive_approximation_low,
+                                            &mut eob_run,
+                                            &mut dc_predictors[i])?;
+                            }
+                            else {
+                                decode_block_successive_approximation(&mut self.reader,
+                                                                    coefficients,
+                                                                    &mut huffman,
+                                                                    self.ac_huffman_tables[scan.ac_table_indices[i]].as_ref(),
+                                                                    scan.spectral_selection.clone(),
+                                                                    scan.successive_approximation_low,
+                                                                    &mut eob_run)?;
+                            }
+                        }
+                    }
                 }
             }
 
             // Send the coefficients from this MCU row to the worker thread for dequantization and idct.
             for (i, component) in components.iter().enumerate() {
                 if finished[i] {
+                    // In the event of non-interleaved streams, if we're still building the buffer out,
+                    // keep going; don't send it yet. We also need to ensure we don't skip over the last
+                    // row(s) of the image.
+                    if !is_interleaved && (mcu_y + 1) * 8 < frame.image_size.height {
+                        if (mcu_y + 1) % component.vertical_sampling_factor as u16 > 0 {
+                            continue;
+                        }
+                    }
+
                     let coefficients_per_mcu_row = component.block_size.width as usize * component.vertical_sampling_factor as usize * 64;
 
                     let row_coefficients = if is_progressive {
-                        let offset = mcu_y as usize * coefficients_per_mcu_row;
+                        // Because non-interleaved streams will have multiple MCU rows concatenated together,
+                        // the row for calculating the offset is different.
+                        let worker_mcu_y = if is_interleaved {
+                            mcu_y
+                        } else {
+                            // Explicitly doing floor-division here
+                            mcu_y / component.vertical_sampling_factor as u16
+                        };
+
+                        let offset = worker_mcu_y as usize * coefficients_per_mcu_row;
                         self.coefficients[scan.component_indices[i]][offset .. offset + coefficients_per_mcu_row].to_vec()
                     } else {
                         mem::replace(&mut mcu_row_coefficients[i], vec![0i16; coefficients_per_mcu_row])