Skip to content

Commit 646ad7e

Browse files
committed
Implemented eff_hsla_hue, eff_hsla_sat, eff_hsla_light for AArch64 ASIMD.
1 parent b1a0b01 commit 646ad7e

File tree

5 files changed

+131
-7
lines changed

5 files changed

+131
-7
lines changed

CHANGELOG

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
* Implemented axis_apply_log1 and axis_apply_log2 optimized for AArch64 ASIMD.
77
* Implemented fill_rgba and fill_hsla for AArch64 ASIMD.
88
* Implemented rgba_to_hsla, hsla_to_rgba, rgba_to_bgra32, rgba32_to_bgra32 for AArch64 ASIMD.
9-
* Implemented eff_hsla_hue, eff_hsla_sat for AArch64 ASIMD.
9+
* Implemented eff_hsla_hue, eff_hsla_sat, eff_hsla_light for AArch64 ASIMD.
1010

1111
=== 1.0.6 ===
1212
* Updated build scripts.

include/private/dsp/arch/aarch64/asimd/graphics/effects.h

Lines changed: 125 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,7 @@ namespace lsp
292292

293293
: [dst] "+r" (dst), [src] "+r" (v), [count] "+r" (count),
294294
[eff] "+r" (eff)
295-
: [XC] "r" (&EFF_HSLA_HUE_XC[0])
295+
:
296296
: "cc", "memory",
297297
"v0", "v1", "v2", "v3",
298298
"v4", "v5", "v6", "v7",
@@ -301,6 +301,130 @@ namespace lsp
301301
);
302302
}
303303

304+
#define EFF_HSLA_LIGHT_CORE \
305+
/* v0 = v[0] */ \
306+
/* v1 = v[1] */ \
307+
/* v8 = h */ \
308+
/* v9 = s */ \
309+
/* v10 = l */ \
310+
/* v11 = a */ \
311+
/* v14 = T */ \
312+
/* v15 = KT */ \
313+
__ASM_EMIT("fabs v5.4s, v1.4s") /* v5 = V1 = abs(v1) */ \
314+
__ASM_EMIT("fabs v1.4s, v0.4s") /* v1 = V = abs(v) */ \
315+
__ASM_EMIT("fsub v3.4s, v14.4s, v1.4s") /* v3 = T - V */ \
316+
__ASM_EMIT("fsub v7.4s, v14.4s, v5.4s") \
317+
__ASM_EMIT("fcmgt v2.4s, v3.4s, #0.0") /* v2 = [(T-V) > 0] */ \
318+
__ASM_EMIT("fcmgt v6.4s, v7.4s, #0.0") \
319+
__ASM_EMIT("fmul v3.4s, v3.4s, v15.4s") /* v3 = (T-V)*KT */ \
320+
__ASM_EMIT("fmul v7.4s, v7.4s, v15.4s") \
321+
__ASM_EMIT("bit v1.16b, v14.16b, v2.16b") /* v1 = EL = V&[(T-V) <= 0] | T&[(T-V) > 0] */ \
322+
__ASM_EMIT("bit v5.16b, v14.16b, v6.16b") \
323+
__ASM_EMIT("and v3.16b, v3.16b, v2.16b") /* v3 = A = ((T-V)*KT) & [(T-V) > 0] */ \
324+
__ASM_EMIT("and v7.16b, v7.16b, v6.16b") \
325+
__ASM_EMIT("fmul v2.4s, v1.4s, v10.4s") /* v2 = EL*l = L */ \
326+
__ASM_EMIT("fmul v6.4s, v5.4s, v10.4s") \
327+
__ASM_EMIT("mov v0.16b, v8.16b") \
328+
__ASM_EMIT("mov v1.16b, v9.16b") \
329+
__ASM_EMIT("mov v4.16b, v8.16b") \
330+
__ASM_EMIT("mov v5.16b, v9.16b")
331+
332+
/*
333+
kt = 1.0f / eff->thresh;
334+
value = (value >= 0.0f) ? value : -value;
335+
336+
if ((eff->thresh - value) <= 0)
337+
{
338+
dst[2] = eff->l * value;
339+
dst[3] = 0.0f;
340+
}
341+
else
342+
{
343+
dst[2] = eff->l * eff->thresh;
344+
dst[3] = (eff->thresh - value) * kt;
345+
}
346+
347+
dst[0] = eff->h;
348+
dst[1] = eff->s;
349+
*/
350+
351+
void eff_hsla_light(float *dst, const float *v, const dsp::hsla_light_eff_t *eff, size_t count)
352+
{
353+
ARCH_AARCH64_ASM
354+
(
355+
__ASM_EMIT("ld4r {v8.4s, v9.4s, v10.4s, v11.4s}, [%[eff]]") /* v8 = h, v9 = s, v10 = l, v11 = a */
356+
__ASM_EMIT("add %[eff], %[eff], #0x10")
357+
__ASM_EMIT("ld1r {v14.4s}, [%[eff]]") /* v14 = T */
358+
__ASM_EMIT("frecpe v0.4s, v14.4s") /* v0 = TD */
359+
__ASM_EMIT("frecps v1.4s, v0.4s, v14.4s") /* v1 = (2 - TD*T) */
360+
__ASM_EMIT("fmul v0.4s, v1.4s, v0.4s") /* v0 = t' = TD * (2 - TD*T) */
361+
__ASM_EMIT("frecps v1.4s, v0.4s, v14.4s") /* v1 = (2 - TD*t') */
362+
__ASM_EMIT("fmul v15.4s, v1.4s, v0.4s") /* v15 = KT = 1/t = t' * (2 - TD*t') */
363+
364+
//-----------------------------------------------------------------
365+
// 8x blocks
366+
__ASM_EMIT("subs %[count], %[count], #8")
367+
__ASM_EMIT("b.lo 2f")
368+
__ASM_EMIT("1:")
369+
__ASM_EMIT("ldp q0, q1, [%[src]]") /* v8 = v[0], v9 = v[1] */
370+
EFF_HSLA_LIGHT_CORE
371+
__ASM_EMIT("subs %[count], %[count], #8")
372+
__ASM_EMIT("st4 {v0.4s, v1.4s, v2.4s, v3.4s}, [%[dst]]")
373+
__ASM_EMIT("add %[dst], %[dst], 0x40")
374+
__ASM_EMIT("add %[src], %[src], 0x20")
375+
__ASM_EMIT("st4 {v4.4s, v5.4s, v6.4s, v7.4s}, [%[dst]]")
376+
__ASM_EMIT("add %[dst], %[dst], 0x40")
377+
__ASM_EMIT("b.hs 1b")
378+
379+
//-----------------------------------------------------------------
380+
// 1x-8x block
381+
__ASM_EMIT("2:")
382+
__ASM_EMIT("adds %[count], %[count], #8")
383+
__ASM_EMIT("b.ls 14f")
384+
__ASM_EMIT("tst %[count], #4")
385+
__ASM_EMIT("b.eq 4f")
386+
__ASM_EMIT("ldr q0, [%[src]]")
387+
__ASM_EMIT("add %[src], %[src], 0x10")
388+
__ASM_EMIT("4:")
389+
__ASM_EMIT("tst %[count], #2")
390+
__ASM_EMIT("b.eq 6f")
391+
__ASM_EMIT("ld1 {v1.2s}, [%[src]]")
392+
__ASM_EMIT("add %[src], %[src], 0x08")
393+
__ASM_EMIT("6:")
394+
__ASM_EMIT("tst %[count], #1")
395+
__ASM_EMIT("b.eq 8f")
396+
__ASM_EMIT("ld1 {v1.s}[2], [%[src]]")
397+
__ASM_EMIT("8:")
398+
EFF_HSLA_LIGHT_CORE
399+
__ASM_EMIT("tst %[count], #4")
400+
__ASM_EMIT("b.eq 10f")
401+
__ASM_EMIT("st4 {v0.4s, v1.4s, v2.4s, v3.4s}, [%[dst]]")
402+
__ASM_EMIT("add %[dst], %[dst], 0x40")
403+
__ASM_EMIT("10:")
404+
__ASM_EMIT("tst %[count], #2")
405+
__ASM_EMIT("b.eq 12f")
406+
__ASM_EMIT("st4 {v4.2s, v5.2s, v6.2s, v7.2s}, [%[dst]]")
407+
__ASM_EMIT("add %[dst], %[dst], 0x20")
408+
__ASM_EMIT("12:")
409+
__ASM_EMIT("tst %[count], #1")
410+
__ASM_EMIT("b.eq 14f")
411+
__ASM_EMIT("st4 {v4.s, v5.s, v6.s, v7.s}[2], [%[dst]]")
412+
// End
413+
__ASM_EMIT("14:")
414+
415+
: [dst] "+r" (dst), [src] "+r" (v), [count] "+r" (count),
416+
[eff] "+r" (eff)
417+
:
418+
: "cc", "memory",
419+
"v0", "v1", "v2", "v3",
420+
"v4", "v5", "v6", "v7",
421+
"v8", "v9", "v10", "v11",
422+
"v12", "v13", "v14", "v15"
423+
);
424+
}
425+
426+
#undef EFF_HSLA_LIGHT_CORE
427+
304428
} /* namespace asimd */
305429
} /* namespace lsp */
306430

src/main/aarch64/asimd.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -412,7 +412,7 @@
412412

413413
EXPORT1(eff_hsla_hue);
414414
EXPORT1(eff_hsla_sat);
415-
// EXPORT1(eff_hsla_light);
415+
EXPORT1(eff_hsla_light);
416416
// EXPORT1(eff_hsla_alpha);
417417
}
418418
} /* namespace asimd */

src/test/ptest/graphics/effects.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ namespace lsp
165165
{
166166
void eff_hsla_hue(float *dst, const float *v, const dsp::hsla_hue_eff_t *eff, size_t count);
167167
void eff_hsla_sat(float *dst, const float *v, const dsp::hsla_sat_eff_t *eff, size_t count);
168-
// void eff_hsla_light(float *dst, const float *v, const dsp::hsla_light_eff_t *eff, size_t count);
168+
void eff_hsla_light(float *dst, const float *v, const dsp::hsla_light_eff_t *eff, size_t count);
169169
// void eff_hsla_alpha(float *dst, const float *v, const dsp::hsla_alpha_eff_t *eff, size_t count);
170170
}
171171
)
@@ -257,7 +257,7 @@ template <class eff_t>
257257
IF_ARCH_X86(call("sse2::eff_hsla_light", dst, src, count, &light, sse2::eff_hsla_light));
258258
IF_ARCH_X86_64(call("avx2::x64_eff_hsla_light", dst, src, count, &light, avx2::x64_eff_hsla_light));
259259
IF_ARCH_ARM(call("neon_d32::eff_hsla_light", dst, src, count, &light, neon_d32::eff_hsla_light));
260-
// IF_ARCH_AARCH64(call("asimd::eff_hsla_light", dst, src, count, &light, asimd::eff_hsla_light));
260+
IF_ARCH_AARCH64(call("asimd::eff_hsla_light", dst, src, count, &light, asimd::eff_hsla_light));
261261
PTEST_SEPARATOR;
262262

263263
call("static::eff_hsla_alpha", dst, src, count, &alpha, eff_hsla_alpha);

src/test/utest/graphics/effects.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ namespace lsp
6969
{
7070
void eff_hsla_hue(float *dst, const float *v, const dsp::hsla_hue_eff_t *eff, size_t count);
7171
void eff_hsla_sat(float *dst, const float *v, const dsp::hsla_sat_eff_t *eff, size_t count);
72-
// void eff_hsla_light(float *dst, const float *v, const dsp::hsla_light_eff_t *eff, size_t count);
72+
void eff_hsla_light(float *dst, const float *v, const dsp::hsla_light_eff_t *eff, size_t count);
7373
// void eff_hsla_alpha(float *dst, const float *v, const dsp::hsla_alpha_eff_t *eff, size_t count);
7474
}
7575
)
@@ -177,7 +177,7 @@ UTEST_BEGIN("dsp.graphics", effects)
177177

178178
IF_ARCH_AARCH64(call("asimd::eff_hsla_hue", 16, generic::eff_hsla_hue, asimd::eff_hsla_hue, &hue));
179179
IF_ARCH_AARCH64(call("asimd::eff_hsla_sat", 16, generic::eff_hsla_sat, asimd::eff_hsla_sat, &sat));
180-
// IF_ARCH_AARCH64(call("asimd::eff_hsla_light", 16, generic::eff_hsla_light, asimd::eff_hsla_light, &light));
180+
IF_ARCH_AARCH64(call("asimd::eff_hsla_light", 16, generic::eff_hsla_light, asimd::eff_hsla_light, &light));
181181
// IF_ARCH_AARCH64(call("asimd::eff_hsla_alpha", 16, generic::eff_hsla_alpha, asimd::eff_hsla_alpha, &alpha));
182182
}
183183

0 commit comments

Comments
 (0)