Skip to content

Commit a248d9c

Browse files
committed
Implemented eff_hsla_alpha for AArch64 ASIMD.
1 parent 646ad7e commit a248d9c

File tree

5 files changed

+120
-6
lines changed

5 files changed

+120
-6
lines changed

CHANGELOG

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
* Implemented axis_apply_log1 and axis_apply_log2 optimized for AArch64 ASIMD.
77
* Implemented fill_rgba and fill_hsla for AArch64 ASIMD.
88
* Implemented rgba_to_hsla, hsla_to_rgba, rgba_to_bgra32, rgba32_to_bgra32 for AArch64 ASIMD.
9-
* Implemented eff_hsla_hue, eff_hsla_sat, eff_hsla_light for AArch64 ASIMD.
9+
* Implemented eff_hsla_hue, eff_hsla_sat, eff_hsla_light, eff_hsla_alpha for AArch64 ASIMD.
1010

1111
=== 1.0.6 ===
1212
* Updated build scripts.

include/private/dsp/arch/aarch64/asimd/graphics/effects.h

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -425,6 +425,120 @@ namespace lsp
425425

426426
#undef EFF_HSLA_LIGHT_CORE
427427

428+
#define EFF_HSLA_ALPHA_CORE \
429+
/* v0 = v[0] */ \
430+
/* v1 = v[1] */ \
431+
/* v8 = h */ \
432+
/* v9 = s */ \
433+
/* v10 = l */ \
434+
/* v11 = a */ \
435+
/* v14 = 0 */ \
436+
/* v15 = 1 */ \
437+
__ASM_EMIT("fsub v2.4s, v15.4s, v0.4s") /* v2 = 1 - v */ \
438+
__ASM_EMIT("fsub v6.4s, v15.4s, v1.4s") \
439+
__ASM_EMIT("fcmgt v4.4s, v14.4s, v0.4s") /* v4 = 0 > v */ \
440+
__ASM_EMIT("fcmgt v5.4s, v14.4s, v1.4s") \
441+
__ASM_EMIT("fadd v3.4s, v0.4s, v15.4s") /* v3 = v + 1 */ \
442+
__ASM_EMIT("fadd v7.4s, v1.4s, v15.4s") \
443+
__ASM_EMIT("bif v3.16b, v2.16b, v4.16b") /* v0 = V = (v+1)&[0>v] | (1-v)&[0<=v] */ \
444+
__ASM_EMIT("bif v7.16b, v6.16b, v5.16b") \
445+
__ASM_EMIT("mov v0.16b, v8.16b") \
446+
__ASM_EMIT("mov v1.16b, v9.16b") \
447+
__ASM_EMIT("mov v2.16b, v10.16b") \
448+
__ASM_EMIT("mov v4.16b, v8.16b") \
449+
__ASM_EMIT("mov v5.16b, v9.16b") \
450+
__ASM_EMIT("mov v6.16b, v10.16b")
451+
452+
/*
453+
value = v[i];
454+
value = (0.0f > value) ? 1.0f + value : 1.0f - value;
455+
456+
dst[0] = eff->h;
457+
dst[1] = eff->s;
458+
dst[2] = eff->l;
459+
dst[3] = value; // Fill alpha channel
460+
*/
461+
462+
IF_ARCH_AARCH64
463+
(
464+
static const float EFF_HSLA_ALPHA_XC[] __lsp_aligned16 =
465+
{
466+
LSP_DSP_VEC4(0.0f),
467+
LSP_DSP_VEC4(1.0f)
468+
};
469+
)
470+
471+
void eff_hsla_alpha(float *dst, const float *v, const dsp::hsla_alpha_eff_t *eff, size_t count)
472+
{
473+
ARCH_AARCH64_ASM
474+
(
475+
__ASM_EMIT("ld4r {v8.4s, v9.4s, v10.4s, v11.4s}, [%[eff]]") /* v8 = h, v9 = s, v10 = l, v11 = a */
476+
__ASM_EMIT("ldp q14, q15, [%[XC]]") /* v14 = 0.0, v15 = 1.0 */
477+
478+
//-----------------------------------------------------------------
479+
// 8x blocks
480+
__ASM_EMIT("subs %[count], %[count], #8")
481+
__ASM_EMIT("b.lo 2f")
482+
__ASM_EMIT("1:")
483+
__ASM_EMIT("ldp q0, q1, [%[src]]") /* v8 = v[0], v9 = v[1] */
484+
EFF_HSLA_ALPHA_CORE
485+
__ASM_EMIT("subs %[count], %[count], #8")
486+
__ASM_EMIT("st4 {v0.4s, v1.4s, v2.4s, v3.4s}, [%[dst]]")
487+
__ASM_EMIT("add %[dst], %[dst], 0x40")
488+
__ASM_EMIT("add %[src], %[src], 0x20")
489+
__ASM_EMIT("st4 {v4.4s, v5.4s, v6.4s, v7.4s}, [%[dst]]")
490+
__ASM_EMIT("add %[dst], %[dst], 0x40")
491+
__ASM_EMIT("b.hs 1b")
492+
493+
//-----------------------------------------------------------------
494+
// 1x-8x block
495+
__ASM_EMIT("2:")
496+
__ASM_EMIT("adds %[count], %[count], #8")
497+
__ASM_EMIT("b.ls 14f")
498+
__ASM_EMIT("tst %[count], #4")
499+
__ASM_EMIT("b.eq 4f")
500+
__ASM_EMIT("ldr q0, [%[src]]")
501+
__ASM_EMIT("add %[src], %[src], 0x10")
502+
__ASM_EMIT("4:")
503+
__ASM_EMIT("tst %[count], #2")
504+
__ASM_EMIT("b.eq 6f")
505+
__ASM_EMIT("ld1 {v1.2s}, [%[src]]")
506+
__ASM_EMIT("add %[src], %[src], 0x08")
507+
__ASM_EMIT("6:")
508+
__ASM_EMIT("tst %[count], #1")
509+
__ASM_EMIT("b.eq 8f")
510+
__ASM_EMIT("ld1 {v1.s}[2], [%[src]]")
511+
__ASM_EMIT("8:")
512+
EFF_HSLA_ALPHA_CORE
513+
__ASM_EMIT("tst %[count], #4")
514+
__ASM_EMIT("b.eq 10f")
515+
__ASM_EMIT("st4 {v0.4s, v1.4s, v2.4s, v3.4s}, [%[dst]]")
516+
__ASM_EMIT("add %[dst], %[dst], 0x40")
517+
__ASM_EMIT("10:")
518+
__ASM_EMIT("tst %[count], #2")
519+
__ASM_EMIT("b.eq 12f")
520+
__ASM_EMIT("st4 {v4.2s, v5.2s, v6.2s, v7.2s}, [%[dst]]")
521+
__ASM_EMIT("add %[dst], %[dst], 0x20")
522+
__ASM_EMIT("12:")
523+
__ASM_EMIT("tst %[count], #1")
524+
__ASM_EMIT("b.eq 14f")
525+
__ASM_EMIT("st4 {v4.s, v5.s, v6.s, v7.s}[2], [%[dst]]")
526+
// End
527+
__ASM_EMIT("14:")
528+
529+
: [dst] "+r" (dst), [src] "+r" (v), [count] "+r" (count)
530+
: [eff] "r" (eff),
531+
[XC] "r" (&EFF_HSLA_ALPHA_XC[0])
532+
: "cc", "memory",
533+
"v0", "v1", "v2", "v3",
534+
"v4", "v5", "v6", "v7",
535+
"v8", "v9", "v10", "v11",
536+
"v12", "v13", "v14", "v15"
537+
);
538+
}
539+
540+
#undef EFF_HSLA_ALPHA_CORE
541+
428542
} /* namespace asimd */
429543
} /* namespace lsp */
430544

src/main/aarch64/asimd.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -413,7 +413,7 @@
413413
EXPORT1(eff_hsla_hue);
414414
EXPORT1(eff_hsla_sat);
415415
EXPORT1(eff_hsla_light);
416-
// EXPORT1(eff_hsla_alpha);
416+
EXPORT1(eff_hsla_alpha);
417417
}
418418
} /* namespace asimd */
419419
} /* namespace lsp */

src/test/ptest/graphics/effects.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ namespace lsp
166166
void eff_hsla_hue(float *dst, const float *v, const dsp::hsla_hue_eff_t *eff, size_t count);
167167
void eff_hsla_sat(float *dst, const float *v, const dsp::hsla_sat_eff_t *eff, size_t count);
168168
void eff_hsla_light(float *dst, const float *v, const dsp::hsla_light_eff_t *eff, size_t count);
169-
// void eff_hsla_alpha(float *dst, const float *v, const dsp::hsla_alpha_eff_t *eff, size_t count);
169+
void eff_hsla_alpha(float *dst, const float *v, const dsp::hsla_alpha_eff_t *eff, size_t count);
170170
}
171171
)
172172
}
@@ -265,7 +265,7 @@ template <class eff_t>
265265
IF_ARCH_X86(call("sse2::eff_hsla_alpha", dst, src, count, &alpha, sse2::eff_hsla_alpha));
266266
IF_ARCH_X86_64(call("avx2::x64_eff_hsla_alpha", dst, src, count, &alpha, avx2::x64_eff_hsla_alpha));
267267
IF_ARCH_ARM(call("neon_d32::eff_hsla_alpha", dst, src, count, &alpha, neon_d32::eff_hsla_alpha));
268-
// IF_ARCH_AARCH64(call("asimd::eff_hsla_alpha", dst, src, count, &alpha, asimd::eff_hsla_alpha));
268+
IF_ARCH_AARCH64(call("asimd::eff_hsla_alpha", dst, src, count, &alpha, asimd::eff_hsla_alpha));
269269

270270
PTEST_SEPARATOR2;
271271
}

src/test/utest/graphics/effects.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ namespace lsp
7070
void eff_hsla_hue(float *dst, const float *v, const dsp::hsla_hue_eff_t *eff, size_t count);
7171
void eff_hsla_sat(float *dst, const float *v, const dsp::hsla_sat_eff_t *eff, size_t count);
7272
void eff_hsla_light(float *dst, const float *v, const dsp::hsla_light_eff_t *eff, size_t count);
73-
// void eff_hsla_alpha(float *dst, const float *v, const dsp::hsla_alpha_eff_t *eff, size_t count);
73+
void eff_hsla_alpha(float *dst, const float *v, const dsp::hsla_alpha_eff_t *eff, size_t count);
7474
}
7575
)
7676
}
@@ -178,7 +178,7 @@ UTEST_BEGIN("dsp.graphics", effects)
178178
IF_ARCH_AARCH64(call("asimd::eff_hsla_hue", 16, generic::eff_hsla_hue, asimd::eff_hsla_hue, &hue));
179179
IF_ARCH_AARCH64(call("asimd::eff_hsla_sat", 16, generic::eff_hsla_sat, asimd::eff_hsla_sat, &sat));
180180
IF_ARCH_AARCH64(call("asimd::eff_hsla_light", 16, generic::eff_hsla_light, asimd::eff_hsla_light, &light));
181-
// IF_ARCH_AARCH64(call("asimd::eff_hsla_alpha", 16, generic::eff_hsla_alpha, asimd::eff_hsla_alpha, &alpha));
181+
IF_ARCH_AARCH64(call("asimd::eff_hsla_alpha", 16, generic::eff_hsla_alpha, asimd::eff_hsla_alpha, &alpha));
182182
}
183183

184184
UTEST_END;

0 commit comments

Comments
 (0)