@@ -920,46 +920,79 @@ impl Frame {
920920
921921 /// Fills an rgba buffer by skipping the alpha values
922922 pub ( crate ) fn fill_rgba ( & self , buf : & mut [ u8 ] ) {
923+ const BPP : usize = 4 ;
924+
923925 let mut index = 0_usize ;
924926
925927 for ( y, row) in buf
926- . chunks_exact_mut ( usize:: from ( self . width ) * 4 )
928+ . chunks_exact_mut ( usize:: from ( self . width ) * BPP )
927929 . enumerate ( )
928930 {
929- let chroma_index_row = usize:: from ( self . chroma_width ( ) ) * ( y / 2 ) ;
930-
931- for ( x, rgb_chunk) in row. chunks_exact_mut ( 4 ) . enumerate ( ) {
932- let chroma_index = chroma_index_row + x / 2 ;
931+ let chroma_index = usize:: from ( self . chroma_width ( ) ) * ( y / 2 ) ;
933932
934- Frame :: fill_single (
935- self . ybuf [ index] ,
936- self . ubuf [ chroma_index] ,
937- self . vbuf [ chroma_index] ,
938- rgb_chunk,
939- ) ;
933+ let next_index = index + usize:: from ( self . width ) ;
934+ Frame :: fill_rgba_row (
935+ & self . ybuf [ index..next_index] ,
936+ & self . ubuf [ chroma_index..] ,
937+ & self . vbuf [ chroma_index..] ,
938+ row,
939+ ) ;
940940
941- index += 1 ;
942- }
941+ index = next_index;
943942 }
944943 }
945944
946- fn fill_single ( y : u8 , u : u8 , v : u8 , rgb : & mut [ u8 ] ) {
947- // // Conversion values from https://docs.microsoft.com/en-us/windows/win32/medfound/recommended-8-bit-yuv-formats-for-video-rendering#converting-8-bit-yuv-to-rgb888
948- // let c: i32 = i32::from(y) - 16;
949- // let d: i32 = i32::from(u) - 128;
950- // let e: i32 = i32::from(v) - 128;
951- // let r: u8 = clamp((298 * c + 409 * e + 128) >> 8, 0, 255)
952- // .try_into()
953- // .unwrap();
954- // let g: u8 = clamp((298 * c - 100 * d - 208 * e + 128) >> 8, 0, 255)
955- // .try_into()
956- // .unwrap();
957- // let b: u8 = clamp((298 * c + 516 * d + 128) >> 8, 0, 255)
958- // .try_into()
959- // .unwrap();
960- rgb[ 0 ] = clip ( mulhi ( y, 19077 ) + mulhi ( v, 26149 ) - 14234 ) ;
961- rgb[ 1 ] = clip ( mulhi ( y, 19077 ) - mulhi ( u, 6419 ) - mulhi ( v, 13320 ) + 8708 ) ;
962- rgb[ 2 ] = clip ( mulhi ( y, 19077 ) + mulhi ( u, 33050 ) - 17685 ) ;
945+ fn fill_rgba_row ( y_vec : & [ u8 ] , u_vec : & [ u8 ] , v_vec : & [ u8 ] , rgba : & mut [ u8 ] ) {
946+ // Fill 2 pixels per iteration: these pixels share `u` and `v` components
947+ let mut rgb_chunks = rgba. chunks_exact_mut ( 8 ) ;
948+ let mut y_chunks = y_vec. chunks_exact ( 2 ) ;
949+ let mut u_iter = u_vec. iter ( ) ;
950+ let mut v_iter = v_vec. iter ( ) ;
951+
952+ for ( ( ( rgb, y) , & u) , & v) in ( & mut rgb_chunks)
953+ . zip ( & mut y_chunks)
954+ . zip ( & mut u_iter)
955+ . zip ( & mut v_iter)
956+ {
957+ let coeffs = [
958+ mulhi ( v, 26149 ) ,
959+ mulhi ( u, 6419 ) ,
960+ mulhi ( v, 13320 ) ,
961+ mulhi ( u, 33050 ) ,
962+ ] ;
963+
964+ let to_copy = [
965+ clip ( mulhi ( y[ 0 ] , 19077 ) + coeffs[ 0 ] - 14234 ) ,
966+ clip ( mulhi ( y[ 0 ] , 19077 ) - coeffs[ 1 ] - coeffs[ 2 ] + 8708 ) ,
967+ clip ( mulhi ( y[ 0 ] , 19077 ) + coeffs[ 3 ] - 17685 ) ,
968+ rgb[ 3 ] ,
969+ clip ( mulhi ( y[ 1 ] , 19077 ) + coeffs[ 0 ] - 14234 ) ,
970+ clip ( mulhi ( y[ 1 ] , 19077 ) - coeffs[ 1 ] - coeffs[ 2 ] + 8708 ) ,
971+ clip ( mulhi ( y[ 1 ] , 19077 ) + coeffs[ 3 ] - 17685 ) ,
972+ rgb[ 7 ] ,
973+ ] ;
974+ rgb. copy_from_slice ( & to_copy) ;
975+ }
976+
977+ let remainder = rgb_chunks. into_remainder ( ) ;
978+ if remainder. len ( ) >= 4 {
979+ if let ( Some ( & y) , Some ( & u) , Some ( & v) ) = (
980+ y_chunks. remainder ( ) . iter ( ) . next ( ) ,
981+ u_iter. next ( ) ,
982+ v_iter. next ( ) ,
983+ ) {
984+ let coeffs = [
985+ mulhi ( v, 26149 ) ,
986+ mulhi ( u, 6419 ) ,
987+ mulhi ( v, 13320 ) ,
988+ mulhi ( u, 33050 ) ,
989+ ] ;
990+
991+ remainder[ 0 ] = clip ( mulhi ( y, 19077 ) + coeffs[ 0 ] - 14234 ) ;
992+ remainder[ 1 ] = clip ( mulhi ( y, 19077 ) - coeffs[ 1 ] - coeffs[ 2 ] + 8708 ) ;
993+ remainder[ 2 ] = clip ( mulhi ( y, 19077 ) + coeffs[ 3 ] - 17685 ) ;
994+ }
995+ }
963996 }
964997
965998 /// Gets the buffer size
0 commit comments