Skip to content

Commit a701543

Browse files
committed
Implement lock-optimized swblitter3__blit__mask1msb_blend1_*fmt
1 parent ea19e69 commit a701543

File tree

2 files changed

+246
-22
lines changed

2 files changed

+246
-22
lines changed

kos/src/apps/showpic/main.c

Lines changed: 30 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -345,34 +345,45 @@ do_showpic(struct screen_buffer *screen,
345345
video_gfx_getcliph(&image_gfx));
346346
#elif 1
347347
{
348-
REF struct video_buffer *mask_buffer;
349-
mask_buffer = video_buffer_create(VIDEO_BUFFER_AUTO, 64, 64,
350-
video_codec_lookup(VIDEO_CODEC_A1_MSB),
351-
NULL);
348+
static REF struct video_buffer *mask_buffer = NULL;
349+
if (!mask_buffer) {
350+
mask_buffer = video_buffer_create(VIDEO_BUFFER_AUTO, 64, 64,
351+
video_codec_lookup(VIDEO_CODEC_A1_MSB),
352+
NULL);
353+
if (mask_buffer) {
354+
struct video_gfx mask_gfx;
355+
video_buffer_getgfx(mask_buffer, &mask_gfx,
356+
GFX_BLENDMODE_OVERRIDE,
357+
VIDEO_GFX_F_XWRAP | VIDEO_GFX_F_YWRAP, 0);
358+
video_gfx_fill(&mask_gfx, 0, 0, 64 - 1, 64 - 1, VIDEO_COLOR_RGBA(0, 0, 0, 0xff));
359+
for (unsigned int i = 0; i < (32 / 4); ++i) {
360+
unsigned int x = i * 4;
361+
unsigned int y = i * 4;
362+
unsigned int w = 64 - (x * 2);
363+
unsigned int h = 64 - (y * 2);
364+
x += ((i & 1) >> 0) << 1;
365+
y += ((i & 2) >> 1) << 1;
366+
video_gfx_rect(&mask_gfx, x, y, w, h, VIDEO_COLOR_RGBA(0, 0, 0, 0));
367+
}
368+
video_gfx_setblend(&mask_gfx, GFX_BLENDMODE_ALPHAMASK);
369+
}
370+
}
352371
if (mask_buffer) {
353372
struct video_gfx mask_gfx;
354373
video_buffer_getgfx(mask_buffer, &mask_gfx,
355-
GFX_BLENDMODE_OVERRIDE,
374+
GFX_BLENDMODE_ALPHAMASK,
356375
VIDEO_GFX_F_XWRAP | VIDEO_GFX_F_YWRAP, 0);
357-
video_gfx_fill(&mask_gfx, 0, 0, 64-1, 64-1, VIDEO_COLOR_RGBA(0, 0, 0, 0xff));
358-
for (unsigned int i = 0; i < (32 / 4); ++i) {
359-
unsigned int x = i * 4;
360-
unsigned int y = i * 4;
361-
unsigned int w = 64 - (x * 2);
362-
unsigned int h = 64 - (y * 2);
363-
x += ((i & 1) >> 0) << 1;
364-
y += ((i & 2) >> 1) << 1;
365-
video_gfx_rect(&mask_gfx, x, y, w, h, VIDEO_COLOR_RGBA(0, 0, 0, 0));
366-
}
367-
video_gfx_setblend(&mask_gfx, GFX_BLENDMODE_ALPHAMASK);
368-
369-
video_gfx_stretch3(&screen_gfx, blit_x, blit_y,
376+
/*video_gfx_stretch3(&screen_gfx, blit_x, blit_y,
370377
&mask_gfx, 32, 32,
371378
blit_w, blit_h,
372379
&image_gfx, 0, 0,
373380
video_gfx_getclipw(&image_gfx),
381+
video_gfx_getcliph(&image_gfx));*/
382+
video_gfx_bitblit3(&screen_gfx, blit_x, blit_y,
383+
&mask_gfx, 29, 29,
384+
&image_gfx, 0, 0,
385+
video_gfx_getclipw(&image_gfx),
374386
video_gfx_getcliph(&image_gfx));
375-
video_buffer_decref(mask_buffer);
376387
}
377388
}
378389
#elif 0

kos/src/libvideo/gfx/swgfx/ll_blit3-mask1msb.c.inl

Lines changed: 216 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@
3030

3131
#include <hybrid/compiler.h>
3232

33+
#include <hybrid/bit.h>
34+
3335
#include <libvideo/codec/pixel.h>
3436
#include <libvideo/codec/types.h>
3537
#include <libvideo/gfx/gfx.h>
@@ -49,6 +51,7 @@ DECL_BEGIN
4951

5052
#ifdef DEFINE_libvideo_swblitter3__blit__mask1msb
5153
#define LOCAL_libvideo_swblitter3__blit__mask1msb libvideo_swblitter3__blit__mask1msb
54+
#define LOCAL_libvideo_swblitter3__blit__mask1msb__vline libvideo_swblitter3__blit__mask1msb__vline
5255
#define LOCAL_libvideo_swblitter3__stretch__mask1msb_l libvideo_swblitter3__stretch__mask1msb_l
5356
#define LOCAL_libvideo_swblitter3__stretch__mask1msb_n libvideo_swblitter3__stretch__mask1msb_n
5457
#define LOCAL_libvideo_swblitter3__blit_imatrix__mask1msb libvideo_swblitter3__blit_imatrix__mask1msb
@@ -62,6 +65,7 @@ DECL_BEGIN
6265
#define LOCAL_LOGIDENT_swblitter3__stretch_imatrix__mask1msb_n "swblitter3__stretch_imatrix__mask1msb_n"
6366
#elif defined(DEFINE_libvideo_swblitter3__blit__mask1msb_blend1_samefmt)
6467
#define LOCAL_libvideo_swblitter3__blit__mask1msb libvideo_swblitter3__blit__mask1msb_blend1_samefmt
68+
#define LOCAL_libvideo_swblitter3__blit__mask1msb__vline libvideo_swblitter3__blit__mask1msb_blend1_samefmt__vline
6569
#define LOCAL_libvideo_swblitter3__stretch__mask1msb_l libvideo_swblitter3__stretch__mask1msb_blend1_samefmt_l
6670
#define LOCAL_libvideo_swblitter3__stretch__mask1msb_n libvideo_swblitter3__stretch__mask1msb_blend1_samefmt_n
6771
#define LOCAL_libvideo_swblitter3__blit_imatrix__mask1msb libvideo_swblitter3__blit_imatrix__mask1msb_blend1_samefmt
@@ -77,6 +81,7 @@ DECL_BEGIN
7781
#define LOCAL_IS_SAMEFMT
7882
#elif defined(DEFINE_libvideo_swblitter3__blit__mask1msb_blend1_difffmt)
7983
#define LOCAL_libvideo_swblitter3__blit__mask1msb libvideo_swblitter3__blit__mask1msb_blend1_difffmt
84+
#define LOCAL_libvideo_swblitter3__blit__mask1msb__vline libvideo_swblitter3__blit__mask1msb_blend1_difffmt__vline
8085
#define LOCAL_libvideo_swblitter3__stretch__mask1msb_l libvideo_swblitter3__stretch__mask1msb_blend1_difffmt_l
8186
#define LOCAL_libvideo_swblitter3__stretch__mask1msb_n libvideo_swblitter3__stretch__mask1msb_blend1_difffmt_n
8287
#define LOCAL_libvideo_swblitter3__blit_imatrix__mask1msb libvideo_swblitter3__blit_imatrix__mask1msb_blend1_difffmt
@@ -242,24 +247,231 @@ LOCAL_libvideo_swblitter3__stretch_imatrix__mask1msb_l__bypixel(struct video_bli
242247

243248

244249

245-
250+
#ifdef LOCAL_IS_BLEND1
251+
PRIVATE ATTR_IN(1) void CC
252+
LOCAL_libvideo_swblitter3__blit__mask1msb__vline(struct video_blitter3 const *__restrict self,
253+
byte_t *out_line, size_t out_stride, video_coord_t out_x,
254+
byte_t const *src_line, size_t src_stride, video_coord_t src_x,
255+
byte_t const *dst_line, size_t dst_stride,
256+
byte_t dst_mask, video_dim_t size_y) {
257+
#ifdef LOCAL_IS_SAMEFMT
258+
video_codec_linecopy_t vc_linecopy = self->vbt3_wrdst->vx_buffer->vb_format.vf_codec->vc_linecopy;
259+
#else /* LOCAL_IS_SAMEFMT */
260+
video_codec_setpixel_t vc_setpixel = self->vbt3_wrdst->vx_buffer->vb_format.vf_codec->vc_setpixel;
261+
video_codec_getpixel_t vc_getpixel = self->vbt3_src->vx_buffer->vb_format.vf_codec->vc_getpixel;
262+
#endif /* !LOCAL_IS_SAMEFMT */
263+
LOCAL_load_conv;
264+
gfx_assert(dst_mask != 0);
265+
gfx_assert(dst_mask != 0xff);
266+
do {
267+
byte_t mask = *dst_line & dst_mask;
268+
video_coord_t out_line_x = out_x;
269+
video_coord_t src_line_x = src_x;
270+
while (mask) {
271+
shift_t skip = CLZ(mask);
272+
out_line_x += skip;
273+
src_line_x += skip;
274+
mask <<= skip;
275+
/* Copy "n" pixels from "src_line" to "dst_line" */
276+
#ifdef LOCAL_IS_SAMEFMT
277+
{
278+
shift_t set = CLZ((byte_t)~mask);
279+
gfx_assert(set);
280+
(*vc_linecopy)(out_line, out_line_x, src_line, src_line_x, set);
281+
out_line_x += set;
282+
src_line_x += set;
283+
mask <<= set;
284+
}
285+
#else /* LOCAL_IS_SAMEFMT */
286+
do {
287+
video_pixel_t sp = (*vc_getpixel)(src_line, src_line_x);
288+
video_pixel_t op = video_converter_mappixel(conv, sp);
289+
(*vc_setpixel)(out_line, out_line_x, op);
290+
++out_line_x;
291+
++src_line_x;
292+
mask <<= 1;
293+
} while (mask & 0x80);
294+
#endif /* !LOCAL_IS_SAMEFMT */
295+
}
296+
src_line += src_stride;
297+
dst_line += dst_stride;
298+
out_line += out_stride;
299+
} while (--size_y);
300+
}
301+
#endif /* LOCAL_IS_BLEND1 */
246302

247303
INTERN ATTR_IN(1) void CC
248304
LOCAL_libvideo_swblitter3__blit__mask1msb(struct video_blitter3 const *__restrict self,
249305
video_coord_t out_x, video_coord_t out_y,
250306
video_coord_t dst_x, video_coord_t dst_y,
251307
video_coord_t src_x, video_coord_t src_y,
252308
video_dim_t size_x, video_dim_t size_y) {
309+
#ifdef LOCAL_IS_BLEND1
310+
struct video_regionlock out_lock;
311+
struct video_gfx const *out = self->vbt3_wrdst;
312+
struct video_gfx const *dst = self->vbt3_rddst;
313+
struct video_gfx const *src = self->vbt3_src;
314+
#endif /* LOCAL_IS_BLEND1 */
253315
TRACE_START(LOCAL_LOGIDENT_swblitter3__blit__mask1msb "("
254316
"out: {%" PRIuCRD "x%" PRIuCRD "}, "
255317
"dst: {%" PRIuCRD "x%" PRIuCRD "}, "
256318
"src: {%" PRIuCRD "x%" PRIuCRD "}, "
257319
"dim: {%" PRIuDIM "x%" PRIuDIM "})\n",
258320
out_x, out_y, dst_x, dst_y, src_x, src_y, size_x, size_y);
259-
260-
/* TODO: Use video locks */
321+
#ifdef LOCAL_IS_BLEND1
322+
if (LL_wlockregion(out->vx_buffer, &out_lock, out_x, out_y, size_x, size_y)) {
323+
struct video_regionlock dst_lock;
324+
if (LL_rlockregion(dst->vx_buffer, &dst_lock, dst_x, dst_y, size_x, size_y)) {
325+
struct video_regionlock src_lock;
326+
if (LL_rlockregion(src->vx_buffer, &src_lock, src_x, src_y, size_x, size_y)) {
327+
/* Use video locks */
328+
video_coord_t used_src_x = src_lock.vrl_xbas;
329+
video_coord_t used_dst_x = dst_lock.vrl_xbas;
330+
video_coord_t used_out_x = out_lock.vrl_xbas;
331+
byte_t const *src_line = src_lock.vrl_lock.vl_data;
332+
byte_t const *dst_line = dst_lock.vrl_lock.vl_data;
333+
byte_t *out_line = out_lock.vrl_lock.vl_data;
334+
#ifdef LOCAL_IS_SAMEFMT
335+
video_codec_linecopy_t vc_linecopy = out->vx_buffer->vb_format.vf_codec->vc_linecopy;
336+
#else /* LOCAL_IS_SAMEFMT */
337+
video_codec_setpixel_t vc_setpixel = out->vx_buffer->vb_format.vf_codec->vc_setpixel;
338+
video_codec_getpixel_t vc_getpixel = src->vx_buffer->vb_format.vf_codec->vc_getpixel;
339+
#endif /* !LOCAL_IS_SAMEFMT */
340+
LOCAL_load_conv;
341+
342+
/* Deal with unaligned leading bits */
343+
if (used_dst_x) {
344+
dst_line += (used_dst_x >> 3);
345+
used_dst_x &= 7;
346+
if (used_dst_x) {
347+
byte_t leading_mask;
348+
shift_t leading_unaligned = 8 - (shift_t)used_dst_x;
349+
if (leading_unaligned > size_x)
350+
leading_unaligned = (shift_t)size_x;
351+
/* used_dst_x = 1, leading_unaligned = 7: leading_mask = 0x7f
352+
* used_dst_x = 2, leading_unaligned = 6: leading_mask = 0x3f
353+
* used_dst_x = 2, leading_unaligned = 5: leading_mask = 0x3e
354+
* used_dst_x = 2, leading_unaligned = 4: leading_mask = 0x3c
355+
* ... */
356+
leading_mask = (((byte_t)1 << leading_unaligned) - 1)
357+
<< (8 - (used_dst_x + leading_unaligned));
358+
LOCAL_libvideo_swblitter3__blit__mask1msb__vline(self,
359+
out_line, out_lock.vrl_lock.vl_stride, used_out_x,
360+
src_line, src_lock.vrl_lock.vl_stride, used_src_x,
361+
dst_line, dst_lock.vrl_lock.vl_stride, leading_mask, size_y);
362+
if unlikely(leading_unaligned >= size_x)
363+
goto done_lock; /* Blit consisted of **only** unaligned bits. */
364+
used_out_x += leading_unaligned;
365+
used_src_x += leading_unaligned;
366+
size_x -= leading_unaligned;
367+
/*used_dst_x = 0;*/
368+
gfx_assert(size_x != 0);
369+
}
370+
}
371+
372+
/* Deal with unaligned trailing bits */
373+
if (size_x & 7) {
374+
shift_t trailing_unaligned = (shift_t)(size_x & 7);
375+
byte_t trailing_mask = ~(0xff >> trailing_unaligned);
376+
LOCAL_libvideo_swblitter3__blit__mask1msb__vline(self,
377+
out_line, out_lock.vrl_lock.vl_stride, used_out_x + (size_x & ~7),
378+
src_line, src_lock.vrl_lock.vl_stride, used_src_x + (size_x & ~7),
379+
dst_line + (size_x >> 3), dst_lock.vrl_lock.vl_stride, trailing_mask, size_y);
380+
size_x &= ~7;
381+
if unlikely(!size_x)
382+
return;
383+
}
384+
385+
/* Copy pixels specified by byte-aligned bitmask */
386+
size_x >>= 3;
387+
gfx_assert(size_x);
388+
do {
389+
byte_t const *dst_iter = dst_line;
390+
video_dim_t iter_x_byte = 0;
391+
video_coord_t out_line_x = used_out_x;
392+
video_coord_t src_line_x = used_src_x;
393+
do {
394+
byte_t mask = *dst_iter++;
395+
shift_t skip;
396+
++iter_x_byte;
397+
if (!mask) {
398+
out_line_x += 8;
399+
src_line_x += 8;
400+
} else {
401+
skip = CLZ(mask);
402+
#ifdef LOCAL_IS_SAMEFMT
403+
if (mask == (((byte_t)1 << (8 - skip)) - 1)) {
404+
video_dim_t n = 8 - skip;
405+
out_line_x += skip;
406+
src_line_x += skip;
407+
/* Optimize for cross-byte continuous masks. */
408+
while (iter_x_byte < size_x && (*dst_iter == 0xff)) {
409+
++iter_x_byte;
410+
++dst_iter;
411+
n += 8;
412+
}
413+
(*vc_linecopy)(out_line, out_line_x, src_line, src_line_x, n);
414+
if (iter_x_byte >= size_x)
415+
break;
416+
out_line_x += n;
417+
src_line_x += n;
418+
} else
419+
#endif /* LOCAL_IS_SAMEFMT */
420+
{
421+
video_coord_t out_word_x = out_line_x;
422+
video_coord_t src_word_x = src_line_x;
423+
for (;;) {
424+
mask <<= skip;
425+
out_word_x += skip;
426+
src_word_x += skip;
427+
#ifdef LOCAL_IS_SAMEFMT
428+
gfx_assert(mask != 0xff);
429+
{
430+
shift_t set = CLZ((byte_t)~mask);
431+
(*vc_linecopy)(out_line, out_word_x, src_line, src_word_x, set);
432+
out_word_x += set;
433+
src_word_x += set;
434+
mask <<= set;
435+
}
436+
#else /* LOCAL_IS_SAMEFMT */
437+
do {
438+
video_pixel_t sp = (*vc_getpixel)(src_line, src_word_x);
439+
video_pixel_t op = video_converter_mappixel(conv, sp);
440+
(*vc_setpixel)(out_line, out_word_x, op);
441+
++out_word_x;
442+
++src_word_x;
443+
mask <<= 1;
444+
} while (mask & 0x80);
445+
#endif /* !LOCAL_IS_SAMEFMT */
446+
if (!mask)
447+
break;
448+
skip = CLZ(mask);
449+
}
450+
out_line_x += 8;
451+
src_line_x += 8;
452+
}
453+
}
454+
} while (iter_x_byte < size_x);
455+
out_line += out_lock.vrl_lock.vl_stride;
456+
src_line += src_lock.vrl_lock.vl_stride;
457+
dst_line += dst_lock.vrl_lock.vl_stride;
458+
} while (--size_y);
459+
done_lock:
460+
LL_unlockregion(src->vx_buffer, &src_lock);
461+
LL_unlockregion(dst->vx_buffer, &dst_lock);
462+
LL_unlockregion(out->vx_buffer, &out_lock);
463+
goto done;
464+
}
465+
LL_unlockregion(dst->vx_buffer, &dst_lock);
466+
}
467+
LL_unlockregion(out->vx_buffer, &out_lock);
468+
}
469+
#endif /* LOCAL_IS_BLEND1 */
261470
LOCAL_libvideo_swblitter3__blit__mask1msb__bypixel(self, out_x, out_y, dst_x, dst_y,
262471
src_x, src_y, size_x, size_y);
472+
#ifdef LOCAL_IS_BLEND1
473+
done:
474+
#endif /* LOCAL_IS_BLEND1 */
263475
TRACE_END(LOCAL_LOGIDENT_swblitter3__blit__mask1msb "()\n");
264476
}
265477

@@ -448,6 +660,7 @@ LOCAL_libvideo_swblitter3__stretch_imatrix__mask1msb_n(struct video_blitter3 con
448660
#undef LOCAL_libvideo_swblitter3__stretch_imatrix__mask1msb_n__bypixel
449661

450662
#undef LOCAL_libvideo_swblitter3__blit__mask1msb
663+
#undef LOCAL_libvideo_swblitter3__blit__mask1msb__vline
451664
#undef LOCAL_libvideo_swblitter3__stretch__mask1msb_l
452665
#undef LOCAL_libvideo_swblitter3__stretch__mask1msb_n
453666
#undef LOCAL_libvideo_swblitter3__blit_imatrix__mask1msb

0 commit comments

Comments
 (0)