Skip to content

Commit 833e6e7

Browse files
authored
Merge pull request #122 from Shnatsel/faster-rgba-yuv
2 parents ecead22 + 2c42166 commit 833e6e7

File tree

2 files changed

+66
-34
lines changed

2 files changed

+66
-34
lines changed

src/huffman.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,12 @@
1+
//! Rudimentary utility for reading Canonical Huffman Codes.
2+
//! Based off https://github.com/webmproject/libwebp/blob/7f8472a610b61ec780ef0a8873cd954ac512a505/src/utils/huffman.c
3+
14
use std::io::BufRead;
25

36
use crate::decoder::DecodingError;
47

58
use super::lossless::BitReader;
69

7-
/// Rudimentary utility for reading Canonical Huffman Codes.
8-
/// Based off https://github.com/webmproject/libwebp/blob/7f8472a610b61ec780ef0a8873cd954ac512a505/src/utils/huffman.c
9-
///
10-
1110
const MAX_ALLOWED_CODE_LENGTH: usize = 15;
1211
const MAX_TABLE_BITS: u8 = 10;
1312

src/vp8.rs

Lines changed: 63 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -920,46 +920,79 @@ impl Frame {
920920

921921
/// Fills an rgba buffer by skipping the alpha values
922922
pub(crate) fn fill_rgba(&self, buf: &mut [u8]) {
923+
const BPP: usize = 4;
924+
923925
let mut index = 0_usize;
924926

925927
for (y, row) in buf
926-
.chunks_exact_mut(usize::from(self.width) * 4)
928+
.chunks_exact_mut(usize::from(self.width) * BPP)
927929
.enumerate()
928930
{
929-
let chroma_index_row = usize::from(self.chroma_width()) * (y / 2);
930-
931-
for (x, rgb_chunk) in row.chunks_exact_mut(4).enumerate() {
932-
let chroma_index = chroma_index_row + x / 2;
931+
let chroma_index = usize::from(self.chroma_width()) * (y / 2);
933932

934-
Frame::fill_single(
935-
self.ybuf[index],
936-
self.ubuf[chroma_index],
937-
self.vbuf[chroma_index],
938-
rgb_chunk,
939-
);
933+
let next_index = index + usize::from(self.width);
934+
Frame::fill_rgba_row(
935+
&self.ybuf[index..next_index],
936+
&self.ubuf[chroma_index..],
937+
&self.vbuf[chroma_index..],
938+
row,
939+
);
940940

941-
index += 1;
942-
}
941+
index = next_index;
943942
}
944943
}
945944

946-
fn fill_single(y: u8, u: u8, v: u8, rgb: &mut [u8]) {
947-
// // Conversion values from https://docs.microsoft.com/en-us/windows/win32/medfound/recommended-8-bit-yuv-formats-for-video-rendering#converting-8-bit-yuv-to-rgb888
948-
// let c: i32 = i32::from(y) - 16;
949-
// let d: i32 = i32::from(u) - 128;
950-
// let e: i32 = i32::from(v) - 128;
951-
// let r: u8 = clamp((298 * c + 409 * e + 128) >> 8, 0, 255)
952-
// .try_into()
953-
// .unwrap();
954-
// let g: u8 = clamp((298 * c - 100 * d - 208 * e + 128) >> 8, 0, 255)
955-
// .try_into()
956-
// .unwrap();
957-
// let b: u8 = clamp((298 * c + 516 * d + 128) >> 8, 0, 255)
958-
// .try_into()
959-
// .unwrap();
960-
rgb[0] = clip(mulhi(y, 19077) + mulhi(v, 26149) - 14234);
961-
rgb[1] = clip(mulhi(y, 19077) - mulhi(u, 6419) - mulhi(v, 13320) + 8708);
962-
rgb[2] = clip(mulhi(y, 19077) + mulhi(u, 33050) - 17685);
945+
fn fill_rgba_row(y_vec: &[u8], u_vec: &[u8], v_vec: &[u8], rgba: &mut [u8]) {
946+
// Fill 2 pixels per iteration: these pixels share `u` and `v` components
947+
let mut rgb_chunks = rgba.chunks_exact_mut(8);
948+
let mut y_chunks = y_vec.chunks_exact(2);
949+
let mut u_iter = u_vec.iter();
950+
let mut v_iter = v_vec.iter();
951+
952+
for (((rgb, y), &u), &v) in (&mut rgb_chunks)
953+
.zip(&mut y_chunks)
954+
.zip(&mut u_iter)
955+
.zip(&mut v_iter)
956+
{
957+
let coeffs = [
958+
mulhi(v, 26149),
959+
mulhi(u, 6419),
960+
mulhi(v, 13320),
961+
mulhi(u, 33050),
962+
];
963+
964+
let to_copy = [
965+
clip(mulhi(y[0], 19077) + coeffs[0] - 14234),
966+
clip(mulhi(y[0], 19077) - coeffs[1] - coeffs[2] + 8708),
967+
clip(mulhi(y[0], 19077) + coeffs[3] - 17685),
968+
rgb[3],
969+
clip(mulhi(y[1], 19077) + coeffs[0] - 14234),
970+
clip(mulhi(y[1], 19077) - coeffs[1] - coeffs[2] + 8708),
971+
clip(mulhi(y[1], 19077) + coeffs[3] - 17685),
972+
rgb[7],
973+
];
974+
rgb.copy_from_slice(&to_copy);
975+
}
976+
977+
let remainder = rgb_chunks.into_remainder();
978+
if remainder.len() >= 4 {
979+
if let (Some(&y), Some(&u), Some(&v)) = (
980+
y_chunks.remainder().iter().next(),
981+
u_iter.next(),
982+
v_iter.next(),
983+
) {
984+
let coeffs = [
985+
mulhi(v, 26149),
986+
mulhi(u, 6419),
987+
mulhi(v, 13320),
988+
mulhi(u, 33050),
989+
];
990+
991+
remainder[0] = clip(mulhi(y, 19077) + coeffs[0] - 14234);
992+
remainder[1] = clip(mulhi(y, 19077) - coeffs[1] - coeffs[2] + 8708);
993+
remainder[2] = clip(mulhi(y, 19077) + coeffs[3] - 17685);
994+
}
995+
}
963996
}
964997

965998
/// Gets the buffer size

0 commit comments

Comments
 (0)