Skip to content

Commit ea19e69

Browse files
committed
Bring back video_codec::vc_linecopy
Useful to speed up `libvideo_swblitter3__blit__mask1msb_blend1_samefmt`
1 parent 2f987c7 commit ea19e69

File tree

6 files changed

+390
-148
lines changed

6 files changed

+390
-148
lines changed

kos/cpp.hint

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2863,6 +2863,7 @@
28632863
#define DEFINE_GENERIC_vertfill__with__setpixel(f,...)static void f(byte_t*__restrict line,video_coord_t x,size_t stride,video_pixel_t pixel,video_dim_t num_pixels){}
28642864
#define DEFINE_GENERIC_rectfill__with__linefill(f,...)static void f(byte_t*__restrict line,video_coord_t x,size_t stride,video_pixel_t pixel,video_dim_t size_x,video_dim_t size_y){}
28652865
#define DEFINE_GENERIC_rectcopy__with__getpixel__and__setpixel(f,...)static void f(byte_t*__restrict dst_line,video_coord_t dst_x,size_t dst_stride,byte_t const*__restrict src_line,video_coord_t src_x,size_t src_stride,video_dim_t size_x,video_dim_t size_y){}
2866+
#define DEFINE_GENERIC_linecopy__with__getpixel__and__setpixel(f,...)static void f(byte_t*__restrict dst_line,video_coord_t dst_x,byte_t const*__restrict src_line,video_coord_t src_x,video_dim_t size_x){}
28662867
#define DEFINE_GENERIC_rectmove__with__getpixel__and__setpixel(f,...)static void f(byte_t*__restrict dst_line,video_coord_t dst_x,byte_t const*__restrict src_line,video_coord_t src_x,size_t stride,video_dim_t size_x,video_dim_t size_y){}
28672868
#define DEFINE_GENERIC_setpixel3__with__setpixel(f,...)static void f(byte_t*__restrict line,video_coord_t x,video_pixel_t pixel){}
28682869
#define DEFINE_PIXEL64_IO_WRAPPERS(...)

kos/include/libvideo/codec/codecs.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -377,6 +377,15 @@ typedef __ATTR_NONNULL_T((1, 3)) void
377377
__byte_t const *__src_line, video_coord_t __src_x,
378378
__size_t __stride, video_dim_t __size_x, video_dim_t __size_y);
379379

380+
/* Copy a line of pixels. When src/dst overlap, results are weak-undefined.
381+
* @assume(IS_ALIGNED(__dst_line, vc_align));
382+
* @assume(IS_ALIGNED(__src_line, vc_align));
383+
* @assume(__size_x > 0); */
384+
typedef __ATTR_NONNULL_T((1, 3)) void
385+
(LIBVIDEO_CODEC_CC *video_codec_linecopy_t)(__byte_t *__restrict __dst_line, video_coord_t __dst_x,
386+
__byte_t const *__restrict __src_line, video_coord_t __src_x,
387+
video_dim_t __size_x);
388+
380389
/* 64-bit color/pixel functions */
381390
#ifdef CONFIG_VIDEO_CODEC_HAVE_PIXEL64
382391
typedef __ATTR_PURE_T __ATTR_WUNUSED_T __ATTR_NONNULL_T((1)) video_color64_t
@@ -431,6 +440,7 @@ struct video_codec {
431440
video_codec_rectfill_t vc_rectfill; /* Fill a rect of pixels. */
432441
video_codec_rectcopy_t vc_rectcopy; /* Copy a rect of pixels. When src/dst overlap, results are weak-undefined. */
433442
video_codec_rectmove_t vc_rectmove; /* Same as `vc_rectcopy', but properly deal with overlapping video buffers */
443+
video_codec_linecopy_t vc_linecopy; /* Copy a line of pixels */
434444

435445
/* TODO: More operators for fast rectcopy w/ rotation/mirroring */
436446

kos/src/libvideo/codec/codec-utils.h

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ DECL_BEGIN
192192
#if defined(__KERNEL__) || !defined(__pic__)
193193
#define _DEFINE_CODEC_AL1(name, codec, specs, rambuffer_requirements, \
194194
getpixel, setpixel, rectcopy, rectmove, \
195-
linefill, vertfill, rectfill, \
195+
linecopy, linefill, vertfill, rectfill, \
196196
pixel2color, color2pixel, initconverter) \
197197
PRIVATE struct video_codec const name = { \
198198
/* .vc_codec = */ codec, \
@@ -211,6 +211,7 @@ DECL_BEGIN
211211
/* .vc_rectfill = */ &rectfill, \
212212
/* .vc_rectcopy = */ &rectcopy, \
213213
/* .vc_rectmove = */ &rectmove, \
214+
/* .vc_linecopy = */ &linecopy, \
214215
SET_vc_pixel2color64_STATIC_INITIALIZER(&pixel2color##64) \
215216
SET_vc_color2pixel64_STATIC_INITIALIZER(&color2pixel##64) \
216217
SET_vc_getpixel64_STATIC_INITIALIZER(&getpixel##_64) \
@@ -222,11 +223,11 @@ DECL_BEGIN
222223
#define _DEFINE_CODEC_ALX(name, codec, specs, \
223224
align, rambuffer_requirements, \
224225
getpixel, setpixel, rectcopy, rectmove, \
225-
linefill, vertfill, rectfill, \
226+
linecopy, linefill, vertfill, rectfill, \
226227
unaligned_getpixel, unaligned_setpixel, \
227228
unaligned_rectcopy, unaligned_rectmove, \
228-
unaligned_linefill, unaligned_vertfill, \
229-
unaligned_rectfill, \
229+
unaligned_linecopy, unaligned_linefill, \
230+
unaligned_vertfill, unaligned_rectfill, \
230231
pixel2color, color2pixel, initconverter) \
231232
PRIVATE struct video_codec const unaligned_##name = { \
232233
/* .vc_codec = */ codec, \
@@ -245,6 +246,7 @@ DECL_BEGIN
245246
/* .vc_rectfill = */ &unaligned_rectfill, \
246247
/* .vc_rectcopy = */ &unaligned_rectcopy, \
247248
/* .vc_rectmove = */ &unaligned_rectmove, \
249+
/* .vc_linecopy = */ &unaligned_linecopy, \
248250
SET_vc_pixel2color64_STATIC_INITIALIZER(&pixel2color##64) \
249251
SET_vc_color2pixel64_STATIC_INITIALIZER(&color2pixel##64) \
250252
SET_vc_getpixel64_STATIC_INITIALIZER(&unaligned_getpixel##_64) \
@@ -270,6 +272,7 @@ DECL_BEGIN
270272
/* .vc_rectfill = */ &rectfill, \
271273
/* .vc_rectcopy = */ &rectcopy, \
272274
/* .vc_rectmove = */ &rectmove, \
275+
/* .vc_linecopy = */ &linecopy, \
273276
SET_vc_pixel2color64_STATIC_INITIALIZER(&pixel2color##64) \
274277
SET_vc_color2pixel64_STATIC_INITIALIZER(&color2pixel##64) \
275278
SET_vc_getpixel64_STATIC_INITIALIZER(&getpixel##_64) \
@@ -281,7 +284,7 @@ DECL_BEGIN
281284
#else /* __KERNEL__ || !__pic__ */
282285
#define _DEFINE_CODEC_AL1(name, codec, specs, rambuffer_requirements, \
283286
getpixel, setpixel, rectcopy, rectmove, \
284-
linefill, vertfill, rectfill, \
287+
linecopy, linefill, vertfill, rectfill, \
285288
pixel2color, color2pixel, initconverter) \
286289
PRIVATE struct video_codec name = { \
287290
/* .vc_codec = */ codec, \
@@ -301,6 +304,7 @@ DECL_BEGIN
301304
name.vc_rectfill = &rectfill; \
302305
name.vc_rectcopy = &rectcopy; \
303306
name.vc_rectmove = &rectmove; \
307+
name.vc_linecopy = &linecopy; \
304308
SET_vc_pixel2color64_INITIALIZER(name, &pixel2color##64) \
305309
SET_vc_color2pixel64_INITIALIZER(name, &color2pixel##64) \
306310
SET_vc_getpixel64_INITIALIZER(name, &getpixel##_64) \
@@ -315,11 +319,11 @@ DECL_BEGIN
315319
#define _DEFINE_CODEC_ALX(name, codec, specs, \
316320
align, rambuffer_requirements, \
317321
getpixel, setpixel, rectcopy, rectmove, \
318-
linefill, vertfill, rectfill, \
322+
linecopy, linefill, vertfill, rectfill, \
319323
unaligned_getpixel, unaligned_setpixel, \
320324
unaligned_rectcopy, unaligned_rectmove, \
321-
unaligned_linefill, unaligned_vertfill, \
322-
unaligned_rectfill, \
325+
unaligned_linecopy, unaligned_linefill, \
326+
unaligned_vertfill, unaligned_rectfill, \
323327
pixel2color, color2pixel, initconverter) \
324328
PRIVATE struct video_codec unaligned_##name = { \
325329
/* .vc_codec = */ codec, \
@@ -345,6 +349,7 @@ DECL_BEGIN
345349
unaligned_##name.vc_rectfill = &unaligned_rectfill; \
346350
unaligned_##name.vc_rectcopy = &unaligned_rectcopy; \
347351
unaligned_##name.vc_rectmove = &unaligned_rectmove; \
352+
unaligned_##name.vc_linecopy = &unaligned_linecopy; \
348353
SET_vc_pixel2color64_INITIALIZER(unaligned_##name, &pixel2color##64) \
349354
SET_vc_color2pixel64_INITIALIZER(unaligned_##name, &color2pixel##64) \
350355
SET_vc_getpixel64_INITIALIZER(unaligned_##name, &unaligned_getpixel##_64) \
@@ -364,6 +369,7 @@ DECL_BEGIN
364369
name.vc_rectfill = &rectfill; \
365370
name.vc_rectcopy = &rectcopy; \
366371
name.vc_rectmove = &rectmove; \
372+
name.vc_linecopy = &linecopy; \
367373
SET_vc_pixel2color64_INITIALIZER(name, &pixel2color##64) \
368374
SET_vc_color2pixel64_INITIALIZER(name, &color2pixel##64) \
369375
SET_vc_getpixel64_INITIALIZER(name, &getpixel##_64) \
@@ -440,6 +446,19 @@ DECL_BEGIN
440446
line += stride; \
441447
} while (--size_y); \
442448
}
449+
#define DEFINE_GENERIC_linecopy__with__getpixel__and__setpixel(linecopy, getpixel, setpixel) \
450+
PRIVATE NONNULL((1, 3)) void CC \
451+
linecopy(byte_t *__restrict dst_line, video_coord_t dst_x, \
452+
byte_t const *__restrict src_line, video_coord_t src_x, \
453+
video_dim_t size_x) { \
454+
do { \
455+
video_pixel_t pixel; \
456+
pixel = getpixel(src_line, src_x); \
457+
setpixel(dst_line, dst_x, pixel); \
458+
++dst_x; \
459+
++src_x; \
460+
} while (--size_x); \
461+
}
443462
#define DEFINE_GENERIC_rectcopy__with__getpixel__and__setpixel(rectcopy, getpixel, setpixel) \
444463
PRIVATE NONNULL((1, 4)) void CC \
445464
rectcopy(byte_t *__restrict dst_line, video_coord_t dst_x, size_t dst_stride, \

kos/src/libvideo/codec/codecs-extra.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,13 +76,15 @@ DEFINE_GENERIC_vertfill__with__setpixel(x_vbe16_vertfill, x_vbe16_setpixel)
7676
DEFINE_GENERIC_rectfill__with__linefill(x_vbe16_rectfill, x_vbe16_linefill)
7777
DEFINE_GENERIC_rectcopy__with__getpixel__and__setpixel(x_vbe16_rectcopy, x_vbe16_getpixel, x_vbe16_setpixel)
7878
DEFINE_GENERIC_rectmove__with__getpixel__and__setpixel(x_vbe16_rectmove, x_vbe16_getpixel, x_vbe16_setpixel)
79+
DEFINE_GENERIC_linecopy__with__getpixel__and__setpixel(x_vbe16_linecopy, x_vbe16_getpixel, x_vbe16_setpixel)
7980
DEFINE_GENERIC_setpixel3__with__setpixel(rp3_x_vbe16_setpixel, x_vbe16_setpixel)
8081
DEFINE_PIXEL64_IO_WRAPPERS__WITH_PREFIX(PRIVATE, x_vbe16_)
8182

8283
#define unaligned_x_vbe16_getpixel x_vbe16_getpixel
8384
#define unaligned_x_vbe16_setpixel x_vbe16_setpixel
8485
#define unaligned_x_vbe16_rectcopy x_vbe16_rectcopy
8586
#define unaligned_x_vbe16_rectmove x_vbe16_rectmove
87+
#define unaligned_x_vbe16_linecopy x_vbe16_linecopy
8688
#define unaligned_x_vbe16_linefill x_vbe16_linefill
8789
#define unaligned_x_vbe16_vertfill x_vbe16_vertfill
8890
#define unaligned_x_vbe16_rectfill x_vbe16_rectfill
@@ -107,7 +109,8 @@ libvideo_codec_lookup_extra(video_codec_t codec) {
107109
/* vcs_bmask */ 0, /* ... */
108110
/* vcs_amask */ 0), /* ... */
109111
x_vbe16_requirements,
110-
x_vbe16_getpixel, x_vbe16_setpixel, x_vbe16_rectcopy, x_vbe16_rectmove,
112+
x_vbe16_getpixel, x_vbe16_setpixel,
113+
x_vbe16_rectcopy, x_vbe16_rectmove, x_vbe16_linecopy,
111114
x_vbe16_linefill, x_vbe16_vertfill, x_vbe16_rectfill,
112115
pal_pixel2color, pal_color2pixel, initconv_from_p);
113116
result = &x_vbe16;

kos/src/libvideo/codec/codecs-pixel-lt8.c.inl

Lines changed: 130 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -557,8 +557,7 @@ LOCAL_FUNC(rectcopy_bitwise)(byte_t *__restrict dst_line, shift_t dst_head_skip,
557557
byte_t const *__restrict src_line, shift_t src_head_skip, size_t src_stride,
558558
video_dim_t size_x, video_dim_t size_y) {
559559
do {
560-
video_dim_t x;
561-
x = 0;
560+
video_dim_t x = 0;
562561
do {
563562
video_pixel_t pixel = (LOCAL_FUNC(getpixel)(src_line, src_head_skip + x));
564563
(LOCAL_FUNC(setpixel)(dst_line, dst_head_skip + x, pixel));
@@ -741,6 +740,135 @@ LOCAL_FUNC(rectmove)(byte_t *__restrict dst_line, video_coord_t dst_x,
741740
}
742741

743742

743+
/************************************************************************/
744+
/* LINE COPY */
745+
/************************************************************************/
746+
LOCAL NONNULL((1, 2)) void CC
747+
LOCAL_FUNC(linecopy_same_bitoff)(byte_t *__restrict dst_line,
748+
byte_t const *__restrict src_line,
749+
shift_t head_skip, video_dim_t size_x) {
750+
byte_t value;
751+
byte_t head_mask;
752+
byte_t head_mask_i;
753+
byte_t tail_mask;
754+
byte_t tail_mask_i;
755+
shift_t head_fill; /* # of bits to fill in the first byte */
756+
shift_t tail_fill; /* # of bits to fill in the last byte */
757+
video_dim_t full_words;
758+
video_dim_t after_head; /* # of pixels after */
759+
760+
if (size_x <= (video_dim_t)(LOCAL_PIXELS_PER_BYTE - head_skip)) {
761+
/* Head only */
762+
head_mask = _PXMASK(size_x) >> (head_skip << LOCAL_BPP_LOG2);
763+
head_mask_i = ~head_mask;
764+
value = *dst_line;
765+
value &= head_mask_i;
766+
value |= *src_line & head_mask;
767+
*dst_line = value;
768+
return;
769+
}
770+
771+
/* Fill-mode for the head-byte */
772+
head_fill = (LOCAL_PIXELS_PER_BYTE - head_skip) & LOCAL_PIXELS_PER_BYTE_MASK;
773+
774+
/* Figure out how many "whole" words there are in a line */
775+
after_head = size_x - head_fill;
776+
full_words = after_head >> LOCAL_PIXELS_PER_BYTE_LOG2;
777+
778+
/* Figure out how many pixels must be written to a tail-byte */
779+
tail_fill = after_head & LOCAL_PIXELS_PER_BYTE_MASK;
780+
tail_mask = _PXMASK(tail_fill);
781+
tail_mask_i = ~tail_mask;
782+
783+
/* Select fill loop based on presence of head/tail */
784+
if (head_skip) {
785+
head_mask = _PXMASK_I(head_skip);
786+
head_mask_i = ~head_mask;
787+
if (tail_fill) {
788+
#ifndef __OPTIMIZE_SIZE__
789+
if (!full_words) {
790+
dst_line[0] = (dst_line[0] & head_mask_i) | (src_line[0] & head_mask);
791+
dst_line[1] = (dst_line[1] & tail_mask_i) | (src_line[1] & tail_mask);
792+
} else
793+
#endif /* !__OPTIMIZE_SIZE__ */
794+
{
795+
*dst_line = (*dst_line & head_mask_i) | (*src_line & head_mask);
796+
++dst_line;
797+
++src_line;
798+
dst_line = mempcpyb(dst_line, src_line, full_words);
799+
src_line += full_words;
800+
*dst_line = (*dst_line & tail_mask_i) | (*src_line & tail_mask);
801+
}
802+
} else {
803+
#ifndef __OPTIMIZE_SIZE__
804+
if (!full_words) {
805+
*dst_line = (*dst_line & head_mask_i) | (*src_line & head_mask);
806+
} else
807+
#endif /* !__OPTIMIZE_SIZE__ */
808+
{
809+
*dst_line = (*dst_line & head_mask_i) | (*src_line & head_mask);
810+
++dst_line;
811+
++src_line;
812+
memcpyb(dst_line, src_line, full_words);
813+
}
814+
}
815+
} else if (tail_fill) {
816+
#ifndef __OPTIMIZE_SIZE__
817+
if (!full_words) {
818+
*dst_line = (*dst_line & tail_mask_i) | (*src_line & tail_mask);
819+
} else
820+
#endif /* !__OPTIMIZE_SIZE__ */
821+
{
822+
dst_line = mempcpyb(dst_line, src_line, full_words);
823+
src_line += full_words;
824+
*dst_line = (*dst_line & tail_mask_i) | (*src_line & tail_mask);
825+
}
826+
} else {
827+
codec_assert(full_words > 0);
828+
dst_line = mempcpyb(dst_line, src_line, full_words);
829+
}
830+
}
831+
832+
LOCAL NONNULL((1, 3)) void CC
833+
LOCAL_FUNC(linecopy_bitwise)(byte_t *__restrict dst_line, shift_t dst_head_skip,
834+
byte_t const *__restrict src_line, shift_t src_head_skip,
835+
video_dim_t size_x) {
836+
video_dim_t x = 0;
837+
do {
838+
video_pixel_t pixel = (LOCAL_FUNC(getpixel)(src_line, src_head_skip + x));
839+
(LOCAL_FUNC(setpixel)(dst_line, dst_head_skip + x, pixel));
840+
} while (++x < size_x);
841+
}
842+
843+
PRIVATE NONNULL((1, 3)) void CC
844+
LOCAL_FUNC(linecopy)(byte_t *__restrict dst_line, video_coord_t dst_x,
845+
byte_t const *__restrict src_line, video_coord_t src_x,
846+
video_dim_t size_x) {
847+
shift_t dst_head_skip; /* # of bits to skip in the first byte */
848+
shift_t src_head_skip; /* # of bits to skip in the first byte */
849+
codec_assert(size_x > 0);
850+
851+
dst_line += dst_x >> LOCAL_PIXELS_PER_BYTE_LOG2;
852+
dst_head_skip = dst_x & LOCAL_PIXELS_PER_BYTE_MASK;
853+
src_line += src_x >> LOCAL_PIXELS_PER_BYTE_LOG2;
854+
src_head_skip = src_x & LOCAL_PIXELS_PER_BYTE_MASK;
855+
856+
/* Simple case: source and destination have the same bit-alignment */
857+
if (dst_head_skip == src_head_skip) {
858+
(LOCAL_FUNC(linecopy_same_bitoff)(dst_line, src_line,
859+
dst_head_skip, size_x));
860+
return;
861+
}
862+
863+
/* TODO: More optimizations */
864+
865+
(LOCAL_FUNC(linecopy_bitwise)(dst_line, dst_head_skip,
866+
src_line, src_head_skip,
867+
size_x));
868+
}
869+
870+
871+
744872
#undef _PXMASK
745873
#undef _PXMASK_I
746874
#undef _LSB_PXMASK

0 commit comments

Comments
 (0)