@@ -282,21 +282,21 @@ static constexpr void matrixRGB(Pixel* pixel, const int Y, const uint8_t U, cons
282282/* * \brief Decode a DYUV line to ARGB.
283283 * \tparam WIDTH The number of source pixels to decode.
284284 * \param dst Where the ARGB data will be written to.
285- * \param data The source DYUV data.
285+ * \param dyuv The source DYUV data.
286286 * \param initialDYUV The initial value to be used by the DYUV decoder.
287- * \return The number of raw bytes read from \p data .
287+ * \return The number of raw bytes read from \p dyuv .
288288 */
289289template <uint16_t WIDTH>
290- uint16_t decodeDYUVLine (Pixel* dst, const uint8_t * data , uint32_t initialDYUV) noexcept
290+ uint16_t decodeDYUVLine (Pixel* dst, const uint8_t * dyuv , uint32_t initialDYUV) noexcept
291291{
292292 uint8_t py = bits<16 , 23 >(initialDYUV);
293293 uint8_t pu = bits<8 , 15 >(initialDYUV);
294294 uint8_t pv = initialDYUV;
295295
296296 for (uint16_t index = 0 ; index < WIDTH; index += 2 )
297297 {
298- const uint8_t high = data [index];
299- const uint8_t low = data [index + 1 ];
298+ const uint8_t high = dyuv [index];
299+ const uint8_t low = dyuv [index + 1 ];
300300
301301 // Green book V.4.4.2
302302 uint8_t u2 = bits<4 , 7 >(high);
@@ -338,6 +338,91 @@ template uint16_t decodeDYUVLine<384>(Pixel* dst, const uint8_t* data, uint32_t
338338template uint16_t decodeDYUVLine<720 >(Pixel* dst, const uint8_t * data, uint32_t initialDYUV) noexcept ;
339339template uint16_t decodeDYUVLine<768 >(Pixel* dst, const uint8_t * data, uint32_t initialDYUV) noexcept ;
340340
341+ #define DYUV_PIXEL_INDEX (Y, U, V ) (as<uint32_t >(Y) << 16 | as<uint32_t >(U) << 8 | V)
342+
343+ static std::vector<Pixel> generateDYUVPixelLUT () noexcept
344+ {
345+ std::vector<Pixel> array{};
346+ array.resize (256 * 256 * 256 );
347+
348+ for (int y = 0 ; y < 256 ; ++y)
349+ for (int u = 0 ; u < 256 ; ++u)
350+ for (int v = 0 ; v < 256 ; ++v)
351+ {
352+ Pixel pixel{0 };
353+ pixel.r = limu8 (y + matrixVToR[v]);
354+ pixel.g = limu8 (y - (matrixUToG[u] + matrixVToG[v]));
355+ pixel.b = limu8 (y + matrixUToB[u]);
356+ array[DYUV_PIXEL_INDEX (y, u, v)] = pixel;
357+ }
358+ return array;
359+ }
360+
361+ /* * \brief LUT to convert the YUV values to RGB. Use #DYUV_PIXEL_INDEX macro to index this array. */
362+ static const std::vector<Pixel> dyuvPixelLUT = generateDYUVPixelLUT();
363+
364+ /* * \brief Decode a DYUV line to ARGB using a LUT.
365+ * \tparam WIDTH The number of source pixels to decode.
366+ * \param dst Where the ARGB data will be written to.
367+ * \param dyuv The source DYUV data.
368+ * \param initialDYUV The initial value to be used by the DYUV decoder.
369+ * \return The number of raw bytes read from \p dyuv.
370+ *
371+ * This is not a SIMD decoder because each pixel depends on the decoded value of the previous one.
372+ * However this is another approach that uses a pixel LUT to remove as much calculations as possible.
373+ */
374+ template <uint16_t WIDTH>
375+ uint16_t decodeDYUVLineLUT (Pixel* dst, const uint8_t * dyuv, uint32_t initialDYUV) noexcept
376+ {
377+ uint8_t py = bits<16 , 23 >(initialDYUV);
378+ uint8_t pu = bits<8 , 15 >(initialDYUV);
379+ uint8_t pv = initialDYUV;
380+
381+ for (uint16_t index = 0 ; index < WIDTH; index += 2 )
382+ {
383+ const uint8_t high = dyuv[index];
384+ const uint8_t low = dyuv[index + 1 ];
385+
386+ // Green book V.4.4.2
387+ uint8_t u2 = bits<4 , 7 >(high);
388+ uint8_t y1 = bits<0 , 3 >(high);
389+ uint8_t v2 = bits<4 , 7 >(low);
390+ uint8_t y2 = bits<0 , 3 >(low);
391+
392+ y1 = py + dequantizer[y1];
393+ u2 = pu + dequantizer[u2];
394+ v2 = pv + dequantizer[v2];
395+ y2 = y1 + dequantizer[y2];
396+ const uint8_t u1 = (as<uint16_t >(pu) + as<uint16_t >(u2)) >> 1 ;
397+ const uint8_t v1 = (as<uint16_t >(pv) + as<uint16_t >(v2)) >> 1 ;
398+
399+ // Store previous.
400+ py = y2;
401+ pu = u2;
402+ pv = v2;
403+
404+ Pixel* pixel1 = dst++;
405+ *pixel1 = dyuvPixelLUT[DYUV_PIXEL_INDEX (y1, u1, v1)]; // Matrix RGB.
406+ if constexpr (WIDTH == 360 || WIDTH == 384 )
407+ {
408+ memcpy (dst++, pixel1, sizeof (Pixel));
409+ }
410+
411+ Pixel* pixel2 = dst++;
412+ *pixel2 = dyuvPixelLUT[DYUV_PIXEL_INDEX (y2, u2, v2)]; // Matrix RGB.
413+ if constexpr (WIDTH == 360 || WIDTH == 384 )
414+ {
415+ memcpy (dst++, pixel2, sizeof (Pixel));
416+ }
417+ }
418+
419+ return WIDTH;
420+ }
421+ template uint16_t decodeDYUVLineLUT<360 >(Pixel* dst, const uint8_t * dyuv, uint32_t initialDYUV) noexcept ;
422+ template uint16_t decodeDYUVLineLUT<384 >(Pixel* dst, const uint8_t * dyuv, uint32_t initialDYUV) noexcept ;
423+ template uint16_t decodeDYUVLineLUT<720 >(Pixel* dst, const uint8_t * dyuv, uint32_t initialDYUV) noexcept ;
424+ template uint16_t decodeDYUVLineLUT<768 >(Pixel* dst, const uint8_t * dyuv, uint32_t initialDYUV) noexcept ;
425+
341426/* * \brief Decode a CLUT line to ARGB.
342427 * \tparam WIDTH The number of source pixels to decode.
343428 * \param dst Where the ARGB data will be written to.
0 commit comments